1 /* $NetBSD: apprentice.c,v 1.26 2021/04/09 19:11:42 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * apprentice - make one pass through /etc/magic, learning its secrets. 32 */ 33 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: apprentice.c,v 1.301 2021/02/23 00:51:11 christos Exp $") 39 #else 40 __RCSID("$NetBSD: apprentice.c,v 1.26 2021/04/09 19:11:42 christos Exp $"); 41 #endif 42 #endif /* lint */ 43 44 #include "magic.h" 45 #include <stdlib.h> 46 #ifdef HAVE_UNISTD_H 47 #include <unistd.h> 48 #endif 49 #include <stddef.h> 50 #include <string.h> 51 #include <assert.h> 52 #include <ctype.h> 53 #include <fcntl.h> 54 #ifdef QUICK 55 #include <sys/mman.h> 56 #endif 57 #include <dirent.h> 58 #include <limits.h> 59 60 61 #define EATAB {while (isascii(CAST(unsigned char, *l)) && \ 62 isspace(CAST(unsigned char, *l))) ++l;} 63 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \ 64 tolower(CAST(unsigned char, l)) : (l)) 65 /* 66 * Work around a bug in headers on Digital Unix. 67 * At least confirmed for: OSF1 V4.0 878 68 */ 69 #if defined(__osf__) && defined(__DECC) 70 #ifdef MAP_FAILED 71 #undef MAP_FAILED 72 #endif 73 #endif 74 75 #ifndef MAP_FAILED 76 #define MAP_FAILED (void *) -1 77 #endif 78 79 #ifndef MAP_FILE 80 #define MAP_FILE 0 81 #endif 82 83 #define ALLOC_CHUNK CAST(size_t, 10) 84 #define ALLOC_INCR CAST(size_t, 200) 85 86 #define MAP_TYPE_USER 0 87 #define MAP_TYPE_MALLOC 1 88 #define MAP_TYPE_MMAP 2 89 90 struct magic_entry { 91 struct magic *mp; 92 uint32_t cont_count; 93 uint32_t max_count; 94 }; 95 96 struct magic_entry_set { 97 struct magic_entry *me; 98 uint32_t count; 99 uint32_t max; 100 }; 101 102 struct magic_map { 103 void *p; 104 size_t len; 105 int type; 106 struct magic *magic[MAGIC_SETS]; 107 uint32_t nmagic[MAGIC_SETS]; 108 }; 109 110 int file_formats[FILE_NAMES_SIZE]; 111 const size_t file_nformats = FILE_NAMES_SIZE; 112 const char *file_names[FILE_NAMES_SIZE]; 113 const size_t file_nnames = FILE_NAMES_SIZE; 114 115 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 116 private int hextoint(int); 117 private const char *getstr(struct magic_set *, struct magic *, const char *, 118 int); 119 private int parse(struct magic_set *, struct magic_entry *, const char *, 120 size_t, int); 121 private void eatsize(const char **); 122 private int apprentice_1(struct magic_set *, const char *, int); 123 private size_t apprentice_magic_strength(const struct magic *); 124 private int apprentice_sort(const void *, const void *); 125 private void apprentice_list(struct mlist *, int ); 126 private struct magic_map *apprentice_load(struct magic_set *, 127 const char *, int); 128 private struct mlist *mlist_alloc(void); 129 private void mlist_free_all(struct magic_set *); 130 private void mlist_free(struct mlist *); 131 private void byteswap(struct magic *, uint32_t); 132 private void bs1(struct magic *); 133 private uint16_t swap2(uint16_t); 134 private uint32_t swap4(uint32_t); 135 private uint64_t swap8(uint64_t); 136 private char *mkdbname(struct magic_set *, const char *, int); 137 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *, 138 size_t); 139 private struct magic_map *apprentice_map(struct magic_set *, const char *); 140 private int check_buffer(struct magic_set *, struct magic_map *, const char *); 141 private void apprentice_unmap(struct magic_map *); 142 private int apprentice_compile(struct magic_set *, struct magic_map *, 143 const char *); 144 private int check_format_type(const char *, int, const char **); 145 private int check_format(struct magic_set *, struct magic *); 146 private int get_op(char); 147 private int parse_mime(struct magic_set *, struct magic_entry *, const char *, 148 size_t); 149 private int parse_strength(struct magic_set *, struct magic_entry *, 150 const char *, size_t); 151 private int parse_apple(struct magic_set *, struct magic_entry *, const char *, 152 size_t); 153 private int parse_ext(struct magic_set *, struct magic_entry *, const char *, 154 size_t); 155 156 157 private size_t magicsize = sizeof(struct magic); 158 159 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 160 161 private struct { 162 const char *name; 163 size_t len; 164 int (*fun)(struct magic_set *, struct magic_entry *, const char *, 165 size_t); 166 } bang[] = { 167 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 168 DECLARE_FIELD(mime), 169 DECLARE_FIELD(apple), 170 DECLARE_FIELD(ext), 171 DECLARE_FIELD(strength), 172 #undef DECLARE_FIELD 173 { NULL, 0, NULL } 174 }; 175 176 #ifdef COMPILE_ONLY 177 178 int main(int, char *[]); 179 180 int 181 main(int argc, char *argv[]) 182 { 183 int ret; 184 struct magic_set *ms; 185 char *progname; 186 187 if ((progname = strrchr(argv[0], '/')) != NULL) 188 progname++; 189 else 190 progname = argv[0]; 191 192 if (argc != 2) { 193 (void)fprintf(stderr, "Usage: %s file\n", progname); 194 return 1; 195 } 196 197 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 198 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 199 return 1; 200 } 201 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 202 if (ret == 1) 203 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 204 magic_close(ms); 205 return ret; 206 } 207 #endif /* COMPILE_ONLY */ 208 209 struct type_tbl_s { 210 const char name[16]; 211 const size_t len; 212 const int type; 213 const int format; 214 }; 215 216 /* 217 * XXX - the actual Single UNIX Specification says that "long" means "long", 218 * as in the C data type, but we treat it as meaning "4-byte integer". 219 * Given that the OS X version of file 5.04 did the same, I guess that passes 220 * the actual test; having "long" be dependent on how big a "long" is on 221 * the machine running "file" is silly. 222 */ 223 static const struct type_tbl_s type_tbl[] = { 224 # define XX(s) s, (sizeof(s) - 1) 225 # define XX_NULL "", 0 226 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 227 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 228 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 229 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 230 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 231 { XX("string"), FILE_STRING, FILE_FMT_STR }, 232 { XX("date"), FILE_DATE, FILE_FMT_STR }, 233 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 234 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 235 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 236 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 237 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 238 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 239 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 240 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 241 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 242 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 243 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 244 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 245 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 246 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 247 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 248 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 249 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 250 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 251 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 252 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 253 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 254 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 255 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 256 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 257 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 258 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 259 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 260 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 261 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 262 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 263 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 264 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 265 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 266 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 267 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 268 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 269 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 270 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 271 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 272 { XX("use"), FILE_USE, FILE_FMT_NONE }, 273 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 274 { XX("der"), FILE_DER, FILE_FMT_STR }, 275 { XX("guid"), FILE_GUID, FILE_FMT_STR }, 276 { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD }, 277 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 278 }; 279 280 /* 281 * These are not types, and cannot be preceded by "u" to make them 282 * unsigned. 283 */ 284 static const struct type_tbl_s special_tbl[] = { 285 { XX("der"), FILE_DER, FILE_FMT_STR }, 286 { XX("name"), FILE_NAME, FILE_FMT_STR }, 287 { XX("use"), FILE_USE, FILE_FMT_STR }, 288 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 289 }; 290 # undef XX 291 # undef XX_NULL 292 293 private int 294 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 295 { 296 const struct type_tbl_s *p; 297 298 for (p = tbl; p->len; p++) { 299 if (strncmp(l, p->name, p->len) == 0) { 300 if (t) 301 *t = l + p->len; 302 break; 303 } 304 } 305 return p->type; 306 } 307 308 private off_t 309 maxoff_t(void) { 310 if (/*CONSTCOND*/sizeof(off_t) == sizeof(int)) 311 return CAST(off_t, INT_MAX); 312 if (/*CONSTCOND*/sizeof(off_t) == sizeof(long)) 313 return CAST(off_t, LONG_MAX); 314 return 0x7fffffff; 315 } 316 317 private int 318 get_standard_integer_type(const char *l, const char **t) 319 { 320 int type; 321 322 if (isalpha(CAST(unsigned char, l[1]))) { 323 switch (l[1]) { 324 case 'C': 325 /* "dC" and "uC" */ 326 type = FILE_BYTE; 327 break; 328 case 'S': 329 /* "dS" and "uS" */ 330 type = FILE_SHORT; 331 break; 332 case 'I': 333 case 'L': 334 /* 335 * "dI", "dL", "uI", and "uL". 336 * 337 * XXX - the actual Single UNIX Specification says 338 * that "L" means "long", as in the C data type, 339 * but we treat it as meaning "4-byte integer". 340 * Given that the OS X version of file 5.04 did 341 * the same, I guess that passes the actual SUS 342 * validation suite; having "dL" be dependent on 343 * how big a "long" is on the machine running 344 * "file" is silly. 345 */ 346 type = FILE_LONG; 347 break; 348 case 'Q': 349 /* "dQ" and "uQ" */ 350 type = FILE_QUAD; 351 break; 352 default: 353 /* "d{anything else}", "u{anything else}" */ 354 return FILE_INVALID; 355 } 356 l += 2; 357 } else if (isdigit(CAST(unsigned char, l[1]))) { 358 /* 359 * "d{num}" and "u{num}"; we only support {num} values 360 * of 1, 2, 4, and 8 - the Single UNIX Specification 361 * doesn't say anything about whether arbitrary 362 * values should be supported, but both the Solaris 10 363 * and OS X Mountain Lion versions of file passed the 364 * Single UNIX Specification validation suite, and 365 * neither of them support values bigger than 8 or 366 * non-power-of-2 values. 367 */ 368 if (isdigit(CAST(unsigned char, l[2]))) { 369 /* Multi-digit, so > 9 */ 370 return FILE_INVALID; 371 } 372 switch (l[1]) { 373 case '1': 374 type = FILE_BYTE; 375 break; 376 case '2': 377 type = FILE_SHORT; 378 break; 379 case '4': 380 type = FILE_LONG; 381 break; 382 case '8': 383 type = FILE_QUAD; 384 break; 385 default: 386 /* XXX - what about 3, 5, 6, or 7? */ 387 return FILE_INVALID; 388 } 389 l += 2; 390 } else { 391 /* 392 * "d" or "u" by itself. 393 */ 394 type = FILE_LONG; 395 ++l; 396 } 397 if (t) 398 *t = l; 399 return type; 400 } 401 402 private void 403 init_file_tables(void) 404 { 405 static int done = 0; 406 const struct type_tbl_s *p; 407 408 if (done) 409 return; 410 done++; 411 412 for (p = type_tbl; p->len; p++) { 413 assert(p->type < FILE_NAMES_SIZE); 414 file_names[p->type] = p->name; 415 file_formats[p->type] = p->format; 416 } 417 assert(p - type_tbl == FILE_NAMES_SIZE); 418 } 419 420 private int 421 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 422 { 423 struct mlist *ml; 424 425 mlp->map = NULL; 426 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 427 return -1; 428 429 ml->map = idx == 0 ? map : NULL; 430 ml->magic = map->magic[idx]; 431 ml->nmagic = map->nmagic[idx]; 432 433 mlp->prev->next = ml; 434 ml->prev = mlp->prev; 435 ml->next = mlp; 436 mlp->prev = ml; 437 return 0; 438 } 439 440 /* 441 * Handle one file or directory. 442 */ 443 private int 444 apprentice_1(struct magic_set *ms, const char *fn, int action) 445 { 446 struct magic_map *map; 447 #ifndef COMPILE_ONLY 448 struct mlist *ml; 449 size_t i; 450 #endif 451 452 if (magicsize != FILE_MAGICSIZE) { 453 file_error(ms, 0, "magic element size %lu != %lu", 454 CAST(unsigned long, sizeof(*map->magic[0])), 455 CAST(unsigned long, FILE_MAGICSIZE)); 456 return -1; 457 } 458 459 if (action == FILE_COMPILE) { 460 map = apprentice_load(ms, fn, action); 461 if (map == NULL) 462 return -1; 463 return apprentice_compile(ms, map, fn); 464 } 465 466 #ifndef COMPILE_ONLY 467 map = apprentice_map(ms, fn); 468 if (map == NULL) { 469 if (ms->flags & MAGIC_CHECK) 470 file_magwarn(ms, "using regular magic file `%s'", fn); 471 map = apprentice_load(ms, fn, action); 472 if (map == NULL) 473 return -1; 474 } 475 476 for (i = 0; i < MAGIC_SETS; i++) { 477 if (add_mlist(ms->mlist[i], map, i) == -1) { 478 /* failed to add to any list, free explicitly */ 479 if (i == 0) 480 apprentice_unmap(map); 481 else 482 mlist_free_all(ms); 483 file_oomem(ms, sizeof(*ml)); 484 return -1; 485 } 486 } 487 488 if (action == FILE_LIST) { 489 for (i = 0; i < MAGIC_SETS; i++) { 490 printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n", 491 i); 492 apprentice_list(ms->mlist[i], BINTEST); 493 printf("Text patterns:\n"); 494 apprentice_list(ms->mlist[i], TEXTTEST); 495 } 496 } 497 return 0; 498 #else 499 return 0; 500 #endif /* COMPILE_ONLY */ 501 } 502 503 protected void 504 file_ms_free(struct magic_set *ms) 505 { 506 size_t i; 507 if (ms == NULL) 508 return; 509 for (i = 0; i < MAGIC_SETS; i++) 510 mlist_free(ms->mlist[i]); 511 free(ms->o.pbuf); 512 free(ms->o.buf); 513 free(ms->c.li); 514 free(ms); 515 } 516 517 protected struct magic_set * 518 file_ms_alloc(int flags) 519 { 520 struct magic_set *ms; 521 size_t i, len; 522 523 if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u), 524 sizeof(struct magic_set)))) == NULL) 525 return NULL; 526 527 if (magic_setflags(ms, flags) == -1) { 528 errno = EINVAL; 529 goto free; 530 } 531 532 ms->o.buf = ms->o.pbuf = NULL; 533 ms->o.blen = 0; 534 len = (ms->c.len = 10) * sizeof(*ms->c.li); 535 536 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 537 goto free; 538 539 ms->event_flags = 0; 540 ms->error = -1; 541 for (i = 0; i < MAGIC_SETS; i++) 542 ms->mlist[i] = NULL; 543 ms->file = "unknown"; 544 ms->line = 0; 545 ms->indir_max = FILE_INDIR_MAX; 546 ms->name_max = FILE_NAME_MAX; 547 ms->elf_shnum_max = FILE_ELF_SHNUM_MAX; 548 ms->elf_phnum_max = FILE_ELF_PHNUM_MAX; 549 ms->elf_notes_max = FILE_ELF_NOTES_MAX; 550 ms->regex_max = FILE_REGEX_MAX; 551 ms->bytes_max = FILE_BYTES_MAX; 552 ms->encoding_max = FILE_ENCODING_MAX; 553 return ms; 554 free: 555 free(ms); 556 return NULL; 557 } 558 559 private void 560 apprentice_unmap(struct magic_map *map) 561 { 562 size_t i; 563 if (map == NULL) 564 return; 565 566 switch (map->type) { 567 case MAP_TYPE_USER: 568 break; 569 case MAP_TYPE_MALLOC: 570 for (i = 0; i < MAGIC_SETS; i++) { 571 void *b = map->magic[i]; 572 void *p = map->p; 573 if (CAST(char *, b) >= CAST(char *, p) && 574 CAST(char *, b) <= CAST(char *, p) + map->len) 575 continue; 576 free(map->magic[i]); 577 } 578 free(map->p); 579 break; 580 #ifdef QUICK 581 case MAP_TYPE_MMAP: 582 if (map->p && map->p != MAP_FAILED) 583 (void)munmap(map->p, map->len); 584 break; 585 #endif 586 default: 587 abort(); 588 } 589 free(map); 590 } 591 592 private struct mlist * 593 mlist_alloc(void) 594 { 595 struct mlist *mlist; 596 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 597 return NULL; 598 } 599 mlist->next = mlist->prev = mlist; 600 return mlist; 601 } 602 603 private void 604 mlist_free_all(struct magic_set *ms) 605 { 606 size_t i; 607 608 for (i = 0; i < MAGIC_SETS; i++) { 609 mlist_free(ms->mlist[i]); 610 ms->mlist[i] = NULL; 611 } 612 } 613 614 private void 615 mlist_free_one(struct mlist *ml) 616 { 617 if (ml->map) 618 apprentice_unmap(CAST(struct magic_map *, ml->map)); 619 free(ml); 620 } 621 622 private void 623 mlist_free(struct mlist *mlist) 624 { 625 struct mlist *ml, *next; 626 627 if (mlist == NULL) 628 return; 629 630 for (ml = mlist->next; ml != mlist;) { 631 next = ml->next; 632 mlist_free_one(ml); 633 ml = next; 634 } 635 mlist_free_one(mlist); 636 } 637 638 #ifndef COMPILE_ONLY 639 /* void **bufs: an array of compiled magic files */ 640 protected int 641 buffer_apprentice(struct magic_set *ms, struct magic **bufs, 642 size_t *sizes, size_t nbufs) 643 { 644 size_t i, j; 645 struct mlist *ml; 646 struct magic_map *map; 647 648 if (nbufs == 0) 649 return -1; 650 651 (void)file_reset(ms, 0); 652 653 init_file_tables(); 654 655 for (i = 0; i < MAGIC_SETS; i++) { 656 mlist_free(ms->mlist[i]); 657 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 658 file_oomem(ms, sizeof(*ms->mlist[i])); 659 goto fail; 660 } 661 } 662 663 for (i = 0; i < nbufs; i++) { 664 map = apprentice_buf(ms, bufs[i], sizes[i]); 665 if (map == NULL) 666 goto fail; 667 668 for (j = 0; j < MAGIC_SETS; j++) { 669 if (add_mlist(ms->mlist[j], map, j) == -1) { 670 file_oomem(ms, sizeof(*ml)); 671 goto fail; 672 } 673 } 674 } 675 676 return 0; 677 fail: 678 mlist_free_all(ms); 679 return -1; 680 } 681 #endif 682 683 /* const char *fn: list of magic files and directories */ 684 protected int 685 file_apprentice(struct magic_set *ms, const char *fn, int action) 686 { 687 char *p, *mfn; 688 int fileerr, errs = -1; 689 size_t i, j; 690 691 (void)file_reset(ms, 0); 692 693 if ((fn = magic_getpath(fn, action)) == NULL) 694 return -1; 695 696 init_file_tables(); 697 698 if ((mfn = strdup(fn)) == NULL) { 699 file_oomem(ms, strlen(fn)); 700 return -1; 701 } 702 703 for (i = 0; i < MAGIC_SETS; i++) { 704 mlist_free(ms->mlist[i]); 705 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 706 file_oomem(ms, sizeof(*ms->mlist[i])); 707 for (j = 0; j < i; j++) { 708 mlist_free(ms->mlist[j]); 709 ms->mlist[j] = NULL; 710 } 711 free(mfn); 712 return -1; 713 } 714 } 715 fn = mfn; 716 717 while (fn) { 718 p = strchr(fn, PATHSEP); 719 if (p) 720 *p++ = '\0'; 721 if (*fn == '\0') 722 break; 723 fileerr = apprentice_1(ms, fn, action); 724 errs = MAX(errs, fileerr); 725 fn = p; 726 } 727 728 free(mfn); 729 730 if (errs == -1) { 731 for (i = 0; i < MAGIC_SETS; i++) { 732 mlist_free(ms->mlist[i]); 733 ms->mlist[i] = NULL; 734 } 735 file_error(ms, 0, "could not find any valid magic files!"); 736 return -1; 737 } 738 739 #if 0 740 /* 741 * Always leave the database loaded 742 */ 743 if (action == FILE_LOAD) 744 return 0; 745 746 for (i = 0; i < MAGIC_SETS; i++) { 747 mlist_free(ms->mlist[i]); 748 ms->mlist[i] = NULL; 749 } 750 #endif 751 752 switch (action) { 753 case FILE_LOAD: 754 case FILE_COMPILE: 755 case FILE_CHECK: 756 case FILE_LIST: 757 return 0; 758 default: 759 file_error(ms, 0, "Invalid action %d", action); 760 return -1; 761 } 762 } 763 764 /* 765 * Compute the real length of a magic expression, for the purposes 766 * of determining how "strong" a magic expression is (approximating 767 * how specific its matches are): 768 * - magic characters count 0 unless escaped. 769 * - [] expressions count 1 770 * - {} expressions count 0 771 * - regular characters or escaped magic characters count 1 772 * - 0 length expressions count as one 773 */ 774 private size_t 775 nonmagic(const char *str) 776 { 777 const char *p; 778 size_t rv = 0; 779 780 for (p = str; *p; p++) 781 switch (*p) { 782 case '\\': /* Escaped anything counts 1 */ 783 if (!*++p) 784 p--; 785 rv++; 786 continue; 787 case '?': /* Magic characters count 0 */ 788 case '*': 789 case '.': 790 case '+': 791 case '^': 792 case '$': 793 continue; 794 case '[': /* Bracketed expressions count 1 the ']' */ 795 while (*p && *p != ']') 796 p++; 797 p--; 798 continue; 799 case '{': /* Braced expressions count 0 */ 800 while (*p && *p != '}') 801 p++; 802 if (!*p) 803 p--; 804 continue; 805 default: /* Anything else counts 1 */ 806 rv++; 807 continue; 808 } 809 810 return rv == 0 ? 1 : rv; /* Return at least 1 */ 811 } 812 813 814 private size_t 815 typesize(int type) 816 { 817 switch (type) { 818 case FILE_BYTE: 819 return 1; 820 821 case FILE_SHORT: 822 case FILE_LESHORT: 823 case FILE_BESHORT: 824 return 2; 825 826 case FILE_LONG: 827 case FILE_LELONG: 828 case FILE_BELONG: 829 case FILE_MELONG: 830 return 4; 831 832 case FILE_DATE: 833 case FILE_LEDATE: 834 case FILE_BEDATE: 835 case FILE_MEDATE: 836 case FILE_LDATE: 837 case FILE_LELDATE: 838 case FILE_BELDATE: 839 case FILE_MELDATE: 840 case FILE_FLOAT: 841 case FILE_BEFLOAT: 842 case FILE_LEFLOAT: 843 return 4; 844 845 case FILE_QUAD: 846 case FILE_BEQUAD: 847 case FILE_LEQUAD: 848 case FILE_QDATE: 849 case FILE_LEQDATE: 850 case FILE_BEQDATE: 851 case FILE_QLDATE: 852 case FILE_LEQLDATE: 853 case FILE_BEQLDATE: 854 case FILE_QWDATE: 855 case FILE_LEQWDATE: 856 case FILE_BEQWDATE: 857 case FILE_DOUBLE: 858 case FILE_BEDOUBLE: 859 case FILE_LEDOUBLE: 860 case FILE_OFFSET: 861 return 8; 862 863 case FILE_GUID: 864 return 16; 865 866 default: 867 return FILE_BADSIZE; 868 } 869 } 870 871 /* 872 * Get weight of this magic entry, for sorting purposes. 873 */ 874 private size_t 875 apprentice_magic_strength(const struct magic *m) 876 { 877 #define MULT 10U 878 size_t ts, v; 879 ssize_t val = 2 * MULT; /* baseline strength */ 880 881 switch (m->type) { 882 case FILE_DEFAULT: /* make sure this sorts last */ 883 if (m->factor_op != FILE_FACTOR_OP_NONE) 884 abort(); 885 return 0; 886 887 case FILE_BYTE: 888 case FILE_SHORT: 889 case FILE_LESHORT: 890 case FILE_BESHORT: 891 case FILE_LONG: 892 case FILE_LELONG: 893 case FILE_BELONG: 894 case FILE_MELONG: 895 case FILE_DATE: 896 case FILE_LEDATE: 897 case FILE_BEDATE: 898 case FILE_MEDATE: 899 case FILE_LDATE: 900 case FILE_LELDATE: 901 case FILE_BELDATE: 902 case FILE_MELDATE: 903 case FILE_FLOAT: 904 case FILE_BEFLOAT: 905 case FILE_LEFLOAT: 906 case FILE_QUAD: 907 case FILE_BEQUAD: 908 case FILE_LEQUAD: 909 case FILE_QDATE: 910 case FILE_LEQDATE: 911 case FILE_BEQDATE: 912 case FILE_QLDATE: 913 case FILE_LEQLDATE: 914 case FILE_BEQLDATE: 915 case FILE_QWDATE: 916 case FILE_LEQWDATE: 917 case FILE_BEQWDATE: 918 case FILE_DOUBLE: 919 case FILE_BEDOUBLE: 920 case FILE_LEDOUBLE: 921 case FILE_GUID: 922 case FILE_OFFSET: 923 ts = typesize(m->type); 924 if (ts == FILE_BADSIZE) 925 abort(); 926 val += ts * MULT; 927 break; 928 929 case FILE_PSTRING: 930 case FILE_STRING: 931 val += m->vallen * MULT; 932 break; 933 934 case FILE_BESTRING16: 935 case FILE_LESTRING16: 936 val += m->vallen * MULT / 2; 937 break; 938 939 case FILE_SEARCH: 940 if (m->vallen == 0) 941 break; 942 val += m->vallen * MAX(MULT / m->vallen, 1); 943 break; 944 945 case FILE_REGEX: 946 v = nonmagic(m->value.s); 947 val += v * MAX(MULT / v, 1); 948 break; 949 950 case FILE_INDIRECT: 951 case FILE_NAME: 952 case FILE_USE: 953 break; 954 955 case FILE_DER: 956 val += MULT; 957 break; 958 959 default: 960 (void)fprintf(stderr, "Bad type %d\n", m->type); 961 abort(); 962 } 963 964 switch (m->reln) { 965 case 'x': /* matches anything penalize */ 966 case '!': /* matches almost anything penalize */ 967 val = 0; 968 break; 969 970 case '=': /* Exact match, prefer */ 971 val += MULT; 972 break; 973 974 case '>': 975 case '<': /* comparison match reduce strength */ 976 val -= 2 * MULT; 977 break; 978 979 case '^': 980 case '&': /* masking bits, we could count them too */ 981 val -= MULT; 982 break; 983 984 default: 985 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 986 abort(); 987 } 988 989 switch (m->factor_op) { 990 case FILE_FACTOR_OP_NONE: 991 break; 992 case FILE_FACTOR_OP_PLUS: 993 val += m->factor; 994 break; 995 case FILE_FACTOR_OP_MINUS: 996 val -= m->factor; 997 break; 998 case FILE_FACTOR_OP_TIMES: 999 val *= m->factor; 1000 break; 1001 case FILE_FACTOR_OP_DIV: 1002 val /= m->factor; 1003 break; 1004 default: 1005 abort(); 1006 } 1007 1008 if (val <= 0) /* ensure we only return 0 for FILE_DEFAULT */ 1009 val = 1; 1010 1011 /* 1012 * Magic entries with no description get a bonus because they depend 1013 * on subsequent magic entries to print something. 1014 */ 1015 if (m->desc[0] == '\0') 1016 val++; 1017 return val; 1018 } 1019 1020 /* 1021 * Sort callback for sorting entries by "strength" (basically length) 1022 */ 1023 private int 1024 apprentice_sort(const void *a, const void *b) 1025 { 1026 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 1027 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 1028 size_t sa = apprentice_magic_strength(ma->mp); 1029 size_t sb = apprentice_magic_strength(mb->mp); 1030 if (sa == sb) 1031 return 0; 1032 else if (sa > sb) 1033 return -1; 1034 else 1035 return 1; 1036 } 1037 1038 /* 1039 * Shows sorted patterns list in the order which is used for the matching 1040 */ 1041 private void 1042 apprentice_list(struct mlist *mlist, int mode) 1043 { 1044 uint32_t magindex = 0; 1045 struct mlist *ml; 1046 for (ml = mlist->next; ml != mlist; ml = ml->next) { 1047 for (magindex = 0; magindex < ml->nmagic; magindex++) { 1048 struct magic *m = &ml->magic[magindex]; 1049 if ((m->flag & mode) != mode) { 1050 /* Skip sub-tests */ 1051 while (magindex + 1 < ml->nmagic && 1052 ml->magic[magindex + 1].cont_level != 0) 1053 ++magindex; 1054 continue; /* Skip to next top-level test*/ 1055 } 1056 1057 /* 1058 * Try to iterate over the tree until we find item with 1059 * description/mimetype. 1060 */ 1061 while (magindex + 1 < ml->nmagic && 1062 ml->magic[magindex + 1].cont_level != 0 && 1063 *ml->magic[magindex].desc == '\0' && 1064 *ml->magic[magindex].mimetype == '\0') 1065 magindex++; 1066 1067 printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n", 1068 apprentice_magic_strength(m), 1069 ml->magic[magindex].lineno, 1070 ml->magic[magindex].desc, 1071 ml->magic[magindex].mimetype); 1072 } 1073 } 1074 } 1075 1076 private void 1077 set_test_type(struct magic *mstart, struct magic *m) 1078 { 1079 switch (m->type) { 1080 case FILE_BYTE: 1081 case FILE_SHORT: 1082 case FILE_LONG: 1083 case FILE_DATE: 1084 case FILE_BESHORT: 1085 case FILE_BELONG: 1086 case FILE_BEDATE: 1087 case FILE_LESHORT: 1088 case FILE_LELONG: 1089 case FILE_LEDATE: 1090 case FILE_LDATE: 1091 case FILE_BELDATE: 1092 case FILE_LELDATE: 1093 case FILE_MEDATE: 1094 case FILE_MELDATE: 1095 case FILE_MELONG: 1096 case FILE_QUAD: 1097 case FILE_LEQUAD: 1098 case FILE_BEQUAD: 1099 case FILE_QDATE: 1100 case FILE_LEQDATE: 1101 case FILE_BEQDATE: 1102 case FILE_QLDATE: 1103 case FILE_LEQLDATE: 1104 case FILE_BEQLDATE: 1105 case FILE_QWDATE: 1106 case FILE_LEQWDATE: 1107 case FILE_BEQWDATE: 1108 case FILE_FLOAT: 1109 case FILE_BEFLOAT: 1110 case FILE_LEFLOAT: 1111 case FILE_DOUBLE: 1112 case FILE_BEDOUBLE: 1113 case FILE_LEDOUBLE: 1114 case FILE_DER: 1115 case FILE_GUID: 1116 case FILE_OFFSET: 1117 mstart->flag |= BINTEST; 1118 break; 1119 case FILE_STRING: 1120 case FILE_PSTRING: 1121 case FILE_BESTRING16: 1122 case FILE_LESTRING16: 1123 /* Allow text overrides */ 1124 if (mstart->str_flags & STRING_TEXTTEST) 1125 mstart->flag |= TEXTTEST; 1126 else 1127 mstart->flag |= BINTEST; 1128 break; 1129 case FILE_REGEX: 1130 case FILE_SEARCH: 1131 /* Check for override */ 1132 if (mstart->str_flags & STRING_BINTEST) 1133 mstart->flag |= BINTEST; 1134 if (mstart->str_flags & STRING_TEXTTEST) 1135 mstart->flag |= TEXTTEST; 1136 1137 if (mstart->flag & (TEXTTEST|BINTEST)) 1138 break; 1139 1140 /* binary test if pattern is not text */ 1141 if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL, 1142 NULL) <= 0) 1143 mstart->flag |= BINTEST; 1144 else 1145 mstart->flag |= TEXTTEST; 1146 break; 1147 case FILE_DEFAULT: 1148 /* can't deduce anything; we shouldn't see this at the 1149 top level anyway */ 1150 break; 1151 case FILE_INVALID: 1152 default: 1153 /* invalid search type, but no need to complain here */ 1154 break; 1155 } 1156 } 1157 1158 private int 1159 addentry(struct magic_set *ms, struct magic_entry *me, 1160 struct magic_entry_set *mset) 1161 { 1162 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 1163 if (mset[i].count == mset[i].max) { 1164 struct magic_entry *mp; 1165 1166 mset[i].max += ALLOC_INCR; 1167 if ((mp = CAST(struct magic_entry *, 1168 realloc(mset[i].me, sizeof(*mp) * mset[i].max))) == 1169 NULL) { 1170 file_oomem(ms, sizeof(*mp) * mset[i].max); 1171 return -1; 1172 } 1173 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 1174 ALLOC_INCR); 1175 mset[i].me = mp; 1176 } 1177 mset[i].me[mset[i].count++] = *me; 1178 memset(me, 0, sizeof(*me)); 1179 return 0; 1180 } 1181 1182 /* 1183 * Load and parse one file. 1184 */ 1185 private void 1186 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1187 struct magic_entry_set *mset) 1188 { 1189 size_t lineno = 0, llen = 0; 1190 char *line = NULL; 1191 ssize_t len; 1192 struct magic_entry me; 1193 1194 FILE *f = fopen(ms->file = fn, "r"); 1195 if (f == NULL) { 1196 if (errno != ENOENT) 1197 file_error(ms, errno, "cannot read magic file `%s'", 1198 fn); 1199 (*errs)++; 1200 return; 1201 } 1202 1203 memset(&me, 0, sizeof(me)); 1204 /* read and parse this file */ 1205 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1206 ms->line++) { 1207 if (len == 0) /* null line, garbage, etc */ 1208 continue; 1209 if (line[len - 1] == '\n') { 1210 lineno++; 1211 line[len - 1] = '\0'; /* delete newline */ 1212 } 1213 switch (line[0]) { 1214 case '\0': /* empty, do not parse */ 1215 case '#': /* comment, do not parse */ 1216 continue; 1217 case '!': 1218 if (line[1] == ':') { 1219 size_t i; 1220 1221 for (i = 0; bang[i].name != NULL; i++) { 1222 if (CAST(size_t, len - 2) > bang[i].len && 1223 memcmp(bang[i].name, line + 2, 1224 bang[i].len) == 0) 1225 break; 1226 } 1227 if (bang[i].name == NULL) { 1228 file_error(ms, 0, 1229 "Unknown !: entry `%s'", line); 1230 (*errs)++; 1231 continue; 1232 } 1233 if (me.mp == NULL) { 1234 file_error(ms, 0, 1235 "No current entry for :!%s type", 1236 bang[i].name); 1237 (*errs)++; 1238 continue; 1239 } 1240 if ((*bang[i].fun)(ms, &me, 1241 line + bang[i].len + 2, 1242 len - bang[i].len - 2) != 0) { 1243 (*errs)++; 1244 continue; 1245 } 1246 continue; 1247 } 1248 /*FALLTHROUGH*/ 1249 default: 1250 again: 1251 switch (parse(ms, &me, line, lineno, action)) { 1252 case 0: 1253 continue; 1254 case 1: 1255 (void)addentry(ms, &me, mset); 1256 goto again; 1257 default: 1258 (*errs)++; 1259 break; 1260 } 1261 } 1262 } 1263 if (me.mp) 1264 (void)addentry(ms, &me, mset); 1265 free(line); 1266 (void)fclose(f); 1267 } 1268 1269 /* 1270 * parse a file or directory of files 1271 * const char *fn: name of magic file or directory 1272 */ 1273 private int 1274 cmpstrp(const void *p1, const void *p2) 1275 { 1276 return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2)); 1277 } 1278 1279 1280 private uint32_t 1281 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1282 uint32_t starttest) 1283 { 1284 static const char text[] = "text"; 1285 static const char binary[] = "binary"; 1286 static const size_t len = sizeof(text); 1287 1288 uint32_t i = starttest; 1289 1290 do { 1291 set_test_type(me[starttest].mp, me[i].mp); 1292 if ((ms->flags & MAGIC_DEBUG) == 0) 1293 continue; 1294 (void)fprintf(stderr, "%s%s%s: %s\n", 1295 me[i].mp->mimetype, 1296 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1297 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1298 me[i].mp->flag & BINTEST ? binary : text); 1299 if (me[i].mp->flag & BINTEST) { 1300 char *p = strstr(me[i].mp->desc, text); 1301 if (p && (p == me[i].mp->desc || 1302 isspace(CAST(unsigned char, p[-1]))) && 1303 (p + len - me[i].mp->desc == MAXstring 1304 || (p[len] == '\0' || 1305 isspace(CAST(unsigned char, p[len]))))) 1306 (void)fprintf(stderr, "*** Possible " 1307 "binary test for text type\n"); 1308 } 1309 } while (++i < nme && me[i].mp->cont_level != 0); 1310 return i; 1311 } 1312 1313 private void 1314 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1315 { 1316 uint32_t i; 1317 for (i = 0; i < nme; i++) { 1318 if (me[i].mp->cont_level == 0 && 1319 me[i].mp->type == FILE_DEFAULT) { 1320 while (++i < nme) 1321 if (me[i].mp->cont_level == 0) 1322 break; 1323 if (i != nme) { 1324 /* XXX - Ugh! */ 1325 ms->line = me[i].mp->lineno; 1326 file_magwarn(ms, 1327 "level 0 \"default\" did not sort last"); 1328 } 1329 return; 1330 } 1331 } 1332 } 1333 1334 private int 1335 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1336 struct magic **ma, uint32_t *nma) 1337 { 1338 uint32_t i, mentrycount = 0; 1339 size_t slen; 1340 1341 for (i = 0; i < nme; i++) 1342 mentrycount += me[i].cont_count; 1343 1344 slen = sizeof(**ma) * mentrycount; 1345 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1346 file_oomem(ms, slen); 1347 return -1; 1348 } 1349 1350 mentrycount = 0; 1351 for (i = 0; i < nme; i++) { 1352 (void)memcpy(*ma + mentrycount, me[i].mp, 1353 me[i].cont_count * sizeof(**ma)); 1354 mentrycount += me[i].cont_count; 1355 } 1356 *nma = mentrycount; 1357 return 0; 1358 } 1359 1360 private void 1361 magic_entry_free(struct magic_entry *me, uint32_t nme) 1362 { 1363 uint32_t i; 1364 if (me == NULL) 1365 return; 1366 for (i = 0; i < nme; i++) 1367 free(me[i].mp); 1368 free(me); 1369 } 1370 1371 private struct magic_map * 1372 apprentice_load(struct magic_set *ms, const char *fn, int action) 1373 { 1374 int errs = 0; 1375 uint32_t i, j; 1376 size_t files = 0, maxfiles = 0; 1377 char **filearr = NULL, *mfn; 1378 struct stat st; 1379 struct magic_map *map; 1380 struct magic_entry_set mset[MAGIC_SETS]; 1381 DIR *dir; 1382 struct dirent *d; 1383 1384 memset(mset, 0, sizeof(mset)); 1385 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1386 1387 1388 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1389 { 1390 file_oomem(ms, sizeof(*map)); 1391 return NULL; 1392 } 1393 map->type = MAP_TYPE_MALLOC; 1394 1395 /* print silly verbose header for USG compat. */ 1396 if (action == FILE_CHECK) 1397 (void)fprintf(stderr, "%s\n", usg_hdr); 1398 1399 /* load directory or file */ 1400 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1401 dir = opendir(fn); 1402 if (!dir) { 1403 errs++; 1404 goto out; 1405 } 1406 while ((d = readdir(dir)) != NULL) { 1407 if (d->d_name[0] == '.') 1408 continue; 1409 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1410 file_oomem(ms, 1411 strlen(fn) + strlen(d->d_name) + 2); 1412 errs++; 1413 closedir(dir); 1414 goto out; 1415 } 1416 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1417 free(mfn); 1418 continue; 1419 } 1420 if (files >= maxfiles) { 1421 size_t mlen; 1422 char **nfilearr; 1423 maxfiles = (maxfiles + 1) * 2; 1424 mlen = maxfiles * sizeof(*filearr); 1425 if ((nfilearr = CAST(char **, 1426 realloc(filearr, mlen))) == NULL) { 1427 file_oomem(ms, mlen); 1428 free(mfn); 1429 closedir(dir); 1430 errs++; 1431 goto out; 1432 } 1433 filearr = nfilearr; 1434 } 1435 filearr[files++] = mfn; 1436 } 1437 closedir(dir); 1438 if (filearr) { 1439 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1440 for (i = 0; i < files; i++) { 1441 load_1(ms, action, filearr[i], &errs, mset); 1442 free(filearr[i]); 1443 } 1444 free(filearr); 1445 filearr = NULL; 1446 } 1447 } else 1448 load_1(ms, action, fn, &errs, mset); 1449 if (errs) 1450 goto out; 1451 1452 for (j = 0; j < MAGIC_SETS; j++) { 1453 /* Set types of tests */ 1454 for (i = 0; i < mset[j].count; ) { 1455 if (mset[j].me[i].mp->cont_level != 0) { 1456 i++; 1457 continue; 1458 } 1459 i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1460 } 1461 if (mset[j].me) 1462 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1463 apprentice_sort); 1464 1465 /* 1466 * Make sure that any level 0 "default" line is last 1467 * (if one exists). 1468 */ 1469 set_last_default(ms, mset[j].me, mset[j].count); 1470 1471 /* coalesce per file arrays into a single one, if needed */ 1472 if (mset[j].count == 0) 1473 continue; 1474 1475 if (coalesce_entries(ms, mset[j].me, mset[j].count, 1476 &map->magic[j], &map->nmagic[j]) == -1) { 1477 errs++; 1478 goto out; 1479 } 1480 } 1481 1482 out: 1483 free(filearr); 1484 for (j = 0; j < MAGIC_SETS; j++) 1485 magic_entry_free(mset[j].me, mset[j].count); 1486 1487 if (errs) { 1488 apprentice_unmap(map); 1489 return NULL; 1490 } 1491 return map; 1492 } 1493 1494 /* 1495 * extend the sign bit if the comparison is to be signed 1496 */ 1497 protected uint64_t 1498 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1499 { 1500 if (!(m->flag & UNSIGNED)) { 1501 switch(m->type) { 1502 /* 1503 * Do not remove the casts below. They are 1504 * vital. When later compared with the data, 1505 * the sign extension must have happened. 1506 */ 1507 case FILE_BYTE: 1508 v = CAST(signed char, v); 1509 break; 1510 case FILE_SHORT: 1511 case FILE_BESHORT: 1512 case FILE_LESHORT: 1513 v = CAST(short, v); 1514 break; 1515 case FILE_DATE: 1516 case FILE_BEDATE: 1517 case FILE_LEDATE: 1518 case FILE_MEDATE: 1519 case FILE_LDATE: 1520 case FILE_BELDATE: 1521 case FILE_LELDATE: 1522 case FILE_MELDATE: 1523 case FILE_LONG: 1524 case FILE_BELONG: 1525 case FILE_LELONG: 1526 case FILE_MELONG: 1527 case FILE_FLOAT: 1528 case FILE_BEFLOAT: 1529 case FILE_LEFLOAT: 1530 v = CAST(int32_t, v); 1531 break; 1532 case FILE_QUAD: 1533 case FILE_BEQUAD: 1534 case FILE_LEQUAD: 1535 case FILE_QDATE: 1536 case FILE_QLDATE: 1537 case FILE_QWDATE: 1538 case FILE_BEQDATE: 1539 case FILE_BEQLDATE: 1540 case FILE_BEQWDATE: 1541 case FILE_LEQDATE: 1542 case FILE_LEQLDATE: 1543 case FILE_LEQWDATE: 1544 case FILE_DOUBLE: 1545 case FILE_BEDOUBLE: 1546 case FILE_LEDOUBLE: 1547 case FILE_OFFSET: 1548 v = CAST(int64_t, v); 1549 break; 1550 case FILE_STRING: 1551 case FILE_PSTRING: 1552 case FILE_BESTRING16: 1553 case FILE_LESTRING16: 1554 case FILE_REGEX: 1555 case FILE_SEARCH: 1556 case FILE_DEFAULT: 1557 case FILE_INDIRECT: 1558 case FILE_NAME: 1559 case FILE_USE: 1560 case FILE_CLEAR: 1561 case FILE_DER: 1562 case FILE_GUID: 1563 break; 1564 default: 1565 if (ms->flags & MAGIC_CHECK) 1566 file_magwarn(ms, "cannot happen: m->type=%d\n", 1567 m->type); 1568 return FILE_BADSIZE; 1569 } 1570 } 1571 return v; 1572 } 1573 1574 private int 1575 string_modifier_check(struct magic_set *ms, struct magic *m) 1576 { 1577 if ((ms->flags & MAGIC_CHECK) == 0) 1578 return 0; 1579 1580 if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && 1581 (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { 1582 file_magwarn(ms, 1583 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1584 return -1; 1585 } 1586 switch (m->type) { 1587 case FILE_BESTRING16: 1588 case FILE_LESTRING16: 1589 if (m->str_flags != 0) { 1590 file_magwarn(ms, 1591 "no modifiers allowed for 16-bit strings\n"); 1592 return -1; 1593 } 1594 break; 1595 case FILE_STRING: 1596 case FILE_PSTRING: 1597 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1598 file_magwarn(ms, 1599 "'/%c' only allowed on regex and search\n", 1600 CHAR_REGEX_OFFSET_START); 1601 return -1; 1602 } 1603 break; 1604 case FILE_SEARCH: 1605 if (m->str_range == 0) { 1606 file_magwarn(ms, 1607 "missing range; defaulting to %d\n", 1608 STRING_DEFAULT_RANGE); 1609 m->str_range = STRING_DEFAULT_RANGE; 1610 return -1; 1611 } 1612 break; 1613 case FILE_REGEX: 1614 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1615 file_magwarn(ms, "'/%c' not allowed on regex\n", 1616 CHAR_COMPACT_WHITESPACE); 1617 return -1; 1618 } 1619 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1620 file_magwarn(ms, "'/%c' not allowed on regex\n", 1621 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1622 return -1; 1623 } 1624 break; 1625 default: 1626 file_magwarn(ms, "coding error: m->type=%d\n", 1627 m->type); 1628 return -1; 1629 } 1630 return 0; 1631 } 1632 1633 private int 1634 get_op(char c) 1635 { 1636 switch (c) { 1637 case '&': 1638 return FILE_OPAND; 1639 case '|': 1640 return FILE_OPOR; 1641 case '^': 1642 return FILE_OPXOR; 1643 case '+': 1644 return FILE_OPADD; 1645 case '-': 1646 return FILE_OPMINUS; 1647 case '*': 1648 return FILE_OPMULTIPLY; 1649 case '/': 1650 return FILE_OPDIVIDE; 1651 case '%': 1652 return FILE_OPMODULO; 1653 default: 1654 return -1; 1655 } 1656 } 1657 1658 #ifdef ENABLE_CONDITIONALS 1659 private int 1660 get_cond(const char *l, const char **t) 1661 { 1662 static const struct cond_tbl_s { 1663 char name[8]; 1664 size_t len; 1665 int cond; 1666 } cond_tbl[] = { 1667 { "if", 2, COND_IF }, 1668 { "elif", 4, COND_ELIF }, 1669 { "else", 4, COND_ELSE }, 1670 { "", 0, COND_NONE }, 1671 }; 1672 const struct cond_tbl_s *p; 1673 1674 for (p = cond_tbl; p->len; p++) { 1675 if (strncmp(l, p->name, p->len) == 0 && 1676 isspace(CAST(unsigned char, l[p->len]))) { 1677 if (t) 1678 *t = l + p->len; 1679 break; 1680 } 1681 } 1682 return p->cond; 1683 } 1684 1685 private int 1686 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1687 { 1688 int last_cond; 1689 last_cond = ms->c.li[cont_level].last_cond; 1690 1691 switch (cond) { 1692 case COND_IF: 1693 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1694 if (ms->flags & MAGIC_CHECK) 1695 file_magwarn(ms, "syntax error: `if'"); 1696 return -1; 1697 } 1698 last_cond = COND_IF; 1699 break; 1700 1701 case COND_ELIF: 1702 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1703 if (ms->flags & MAGIC_CHECK) 1704 file_magwarn(ms, "syntax error: `elif'"); 1705 return -1; 1706 } 1707 last_cond = COND_ELIF; 1708 break; 1709 1710 case COND_ELSE: 1711 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1712 if (ms->flags & MAGIC_CHECK) 1713 file_magwarn(ms, "syntax error: `else'"); 1714 return -1; 1715 } 1716 last_cond = COND_NONE; 1717 break; 1718 1719 case COND_NONE: 1720 last_cond = COND_NONE; 1721 break; 1722 } 1723 1724 ms->c.li[cont_level].last_cond = last_cond; 1725 return 0; 1726 } 1727 #endif /* ENABLE_CONDITIONALS */ 1728 1729 private int 1730 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1731 { 1732 const char *l = *lp; 1733 1734 while (!isspace(CAST(unsigned char, *++l))) 1735 switch (*l) { 1736 case CHAR_INDIRECT_RELATIVE: 1737 m->str_flags |= INDIRECT_RELATIVE; 1738 break; 1739 default: 1740 if (ms->flags & MAGIC_CHECK) 1741 file_magwarn(ms, "indirect modifier `%c' " 1742 "invalid", *l); 1743 *lp = l; 1744 return -1; 1745 } 1746 *lp = l; 1747 return 0; 1748 } 1749 1750 private void 1751 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp, 1752 int op) 1753 { 1754 const char *l = *lp; 1755 char *t; 1756 uint64_t val; 1757 1758 ++l; 1759 m->mask_op |= op; 1760 val = CAST(uint64_t, strtoull(l, &t, 0)); 1761 l = t; 1762 m->num_mask = file_signextend(ms, m, val); 1763 eatsize(&l); 1764 *lp = l; 1765 } 1766 1767 private int 1768 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp) 1769 { 1770 const char *l = *lp; 1771 char *t; 1772 int have_range = 0; 1773 1774 while (!isspace(CAST(unsigned char, *++l))) { 1775 switch (*l) { 1776 case '0': case '1': case '2': 1777 case '3': case '4': case '5': 1778 case '6': case '7': case '8': 1779 case '9': 1780 if (have_range && (ms->flags & MAGIC_CHECK)) 1781 file_magwarn(ms, "multiple ranges"); 1782 have_range = 1; 1783 m->str_range = CAST(uint32_t, strtoul(l, &t, 0)); 1784 if (m->str_range == 0) 1785 file_magwarn(ms, "zero range"); 1786 l = t - 1; 1787 break; 1788 case CHAR_COMPACT_WHITESPACE: 1789 m->str_flags |= STRING_COMPACT_WHITESPACE; 1790 break; 1791 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1792 m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE; 1793 break; 1794 case CHAR_IGNORE_LOWERCASE: 1795 m->str_flags |= STRING_IGNORE_LOWERCASE; 1796 break; 1797 case CHAR_IGNORE_UPPERCASE: 1798 m->str_flags |= STRING_IGNORE_UPPERCASE; 1799 break; 1800 case CHAR_REGEX_OFFSET_START: 1801 m->str_flags |= REGEX_OFFSET_START; 1802 break; 1803 case CHAR_BINTEST: 1804 m->str_flags |= STRING_BINTEST; 1805 break; 1806 case CHAR_TEXTTEST: 1807 m->str_flags |= STRING_TEXTTEST; 1808 break; 1809 case CHAR_TRIM: 1810 m->str_flags |= STRING_TRIM; 1811 break; 1812 case CHAR_PSTRING_1_LE: 1813 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a) 1814 if (m->type != FILE_PSTRING) 1815 goto bad; 1816 SET_LENGTH(PSTRING_1_LE); 1817 break; 1818 case CHAR_PSTRING_2_BE: 1819 if (m->type != FILE_PSTRING) 1820 goto bad; 1821 SET_LENGTH(PSTRING_2_BE); 1822 break; 1823 case CHAR_PSTRING_2_LE: 1824 if (m->type != FILE_PSTRING) 1825 goto bad; 1826 SET_LENGTH(PSTRING_2_LE); 1827 break; 1828 case CHAR_PSTRING_4_BE: 1829 if (m->type != FILE_PSTRING) 1830 goto bad; 1831 SET_LENGTH(PSTRING_4_BE); 1832 break; 1833 case CHAR_PSTRING_4_LE: 1834 switch (m->type) { 1835 case FILE_PSTRING: 1836 case FILE_REGEX: 1837 break; 1838 default: 1839 goto bad; 1840 } 1841 SET_LENGTH(PSTRING_4_LE); 1842 break; 1843 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1844 if (m->type != FILE_PSTRING) 1845 goto bad; 1846 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1847 break; 1848 default: 1849 bad: 1850 if (ms->flags & MAGIC_CHECK) 1851 file_magwarn(ms, "string modifier `%c' " 1852 "invalid", *l); 1853 goto out; 1854 } 1855 /* allow multiple '/' for readability */ 1856 if (l[1] == '/' && !isspace(CAST(unsigned char, l[2]))) 1857 l++; 1858 } 1859 if (string_modifier_check(ms, m) == -1) 1860 goto out; 1861 *lp = l; 1862 return 0; 1863 out: 1864 *lp = l; 1865 return -1; 1866 } 1867 1868 /* 1869 * parse one line from magic file, put into magic[index++] if valid 1870 */ 1871 private int 1872 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1873 size_t lineno, int action) 1874 { 1875 #ifdef ENABLE_CONDITIONALS 1876 static uint32_t last_cont_level = 0; 1877 #endif 1878 size_t i; 1879 struct magic *m; 1880 const char *l = line; 1881 char *t; 1882 int op; 1883 uint32_t cont_level; 1884 int32_t diff; 1885 1886 cont_level = 0; 1887 1888 /* 1889 * Parse the offset. 1890 */ 1891 while (*l == '>') { 1892 ++l; /* step over */ 1893 cont_level++; 1894 } 1895 #ifdef ENABLE_CONDITIONALS 1896 if (cont_level == 0 || cont_level > last_cont_level) 1897 if (file_check_mem(ms, cont_level) == -1) 1898 return -1; 1899 last_cont_level = cont_level; 1900 #endif 1901 if (cont_level != 0) { 1902 if (me->mp == NULL) { 1903 file_magerror(ms, "No current entry for continuation"); 1904 return -1; 1905 } 1906 if (me->cont_count == 0) { 1907 file_magerror(ms, "Continuations present with 0 count"); 1908 return -1; 1909 } 1910 m = &me->mp[me->cont_count - 1]; 1911 diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level); 1912 if (diff > 1) 1913 file_magwarn(ms, "New continuation level %u is more " 1914 "than one larger than current level %u", cont_level, 1915 m->cont_level); 1916 if (me->cont_count == me->max_count) { 1917 struct magic *nm; 1918 size_t cnt = me->max_count + ALLOC_CHUNK; 1919 if ((nm = CAST(struct magic *, realloc(me->mp, 1920 sizeof(*nm) * cnt))) == NULL) { 1921 file_oomem(ms, sizeof(*nm) * cnt); 1922 return -1; 1923 } 1924 me->mp = nm; 1925 me->max_count = CAST(uint32_t, cnt); 1926 } 1927 m = &me->mp[me->cont_count++]; 1928 (void)memset(m, 0, sizeof(*m)); 1929 m->cont_level = cont_level; 1930 } else { 1931 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1932 if (me->mp != NULL) 1933 return 1; 1934 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1935 file_oomem(ms, len); 1936 return -1; 1937 } 1938 me->mp = m; 1939 me->max_count = ALLOC_CHUNK; 1940 (void)memset(m, 0, sizeof(*m)); 1941 m->factor_op = FILE_FACTOR_OP_NONE; 1942 m->cont_level = 0; 1943 me->cont_count = 1; 1944 } 1945 m->lineno = CAST(uint32_t, lineno); 1946 1947 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1948 ++l; /* step over */ 1949 m->flag |= OFFADD; 1950 } 1951 if (*l == '(') { 1952 ++l; /* step over */ 1953 m->flag |= INDIR; 1954 if (m->flag & OFFADD) 1955 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1956 1957 if (*l == '&') { /* m->cont_level == 0 checked below */ 1958 ++l; /* step over */ 1959 m->flag |= OFFADD; 1960 } 1961 } 1962 /* Indirect offsets are not valid at level 0. */ 1963 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) { 1964 if (ms->flags & MAGIC_CHECK) 1965 file_magwarn(ms, "relative offset at level 0"); 1966 return -1; 1967 } 1968 1969 /* get offset, then skip over it */ 1970 if (*l == '-') { 1971 ++l; /* step over */ 1972 m->flag |= OFFNEGATIVE; 1973 } 1974 m->offset = CAST(int32_t, strtol(l, &t, 0)); 1975 if (l == t) { 1976 if (ms->flags & MAGIC_CHECK) 1977 file_magwarn(ms, "offset `%s' invalid", l); 1978 return -1; 1979 } 1980 1981 l = t; 1982 1983 if (m->flag & INDIR) { 1984 m->in_type = FILE_LONG; 1985 m->in_offset = 0; 1986 m->in_op = 0; 1987 /* 1988 * read [.,lbs][+-]nnnnn) 1989 */ 1990 if (*l == '.' || *l == ',') { 1991 if (*l == ',') 1992 m->in_op |= FILE_OPSIGNED; 1993 l++; 1994 switch (*l) { 1995 case 'l': 1996 m->in_type = FILE_LELONG; 1997 break; 1998 case 'L': 1999 m->in_type = FILE_BELONG; 2000 break; 2001 case 'm': 2002 m->in_type = FILE_MELONG; 2003 break; 2004 case 'h': 2005 case 's': 2006 m->in_type = FILE_LESHORT; 2007 break; 2008 case 'H': 2009 case 'S': 2010 m->in_type = FILE_BESHORT; 2011 break; 2012 case 'c': 2013 case 'b': 2014 case 'C': 2015 case 'B': 2016 m->in_type = FILE_BYTE; 2017 break; 2018 case 'e': 2019 case 'f': 2020 case 'g': 2021 m->in_type = FILE_LEDOUBLE; 2022 break; 2023 case 'E': 2024 case 'F': 2025 case 'G': 2026 m->in_type = FILE_BEDOUBLE; 2027 break; 2028 case 'i': 2029 m->in_type = FILE_LEID3; 2030 break; 2031 case 'I': 2032 m->in_type = FILE_BEID3; 2033 break; 2034 case 'q': 2035 m->in_type = FILE_LEQUAD; 2036 break; 2037 case 'Q': 2038 m->in_type = FILE_BEQUAD; 2039 break; 2040 default: 2041 if (ms->flags & MAGIC_CHECK) 2042 file_magwarn(ms, 2043 "indirect offset type `%c' invalid", 2044 *l); 2045 return -1; 2046 } 2047 l++; 2048 } 2049 2050 if (*l == '~') { 2051 m->in_op |= FILE_OPINVERSE; 2052 l++; 2053 } 2054 if ((op = get_op(*l)) != -1) { 2055 m->in_op |= op; 2056 l++; 2057 } 2058 if (*l == '(') { 2059 m->in_op |= FILE_OPINDIRECT; 2060 l++; 2061 } 2062 if (isdigit(CAST(unsigned char, *l)) || *l == '-') { 2063 m->in_offset = CAST(int32_t, strtol(l, &t, 0)); 2064 if (l == t) { 2065 if (ms->flags & MAGIC_CHECK) 2066 file_magwarn(ms, 2067 "in_offset `%s' invalid", l); 2068 return -1; 2069 } 2070 l = t; 2071 } 2072 if (*l++ != ')' || 2073 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) { 2074 if (ms->flags & MAGIC_CHECK) 2075 file_magwarn(ms, 2076 "missing ')' in indirect offset"); 2077 return -1; 2078 } 2079 } 2080 EATAB; 2081 2082 #ifdef ENABLE_CONDITIONALS 2083 m->cond = get_cond(l, &l); 2084 if (check_cond(ms, m->cond, cont_level) == -1) 2085 return -1; 2086 2087 EATAB; 2088 #endif 2089 2090 /* 2091 * Parse the type. 2092 */ 2093 if (*l == 'u') { 2094 /* 2095 * Try it as a keyword type prefixed by "u"; match what 2096 * follows the "u". If that fails, try it as an SUS 2097 * integer type. 2098 */ 2099 m->type = get_type(type_tbl, l + 1, &l); 2100 if (m->type == FILE_INVALID) { 2101 /* 2102 * Not a keyword type; parse it as an SUS type, 2103 * 'u' possibly followed by a number or C/S/L. 2104 */ 2105 m->type = get_standard_integer_type(l, &l); 2106 } 2107 /* It's unsigned. */ 2108 if (m->type != FILE_INVALID) 2109 m->flag |= UNSIGNED; 2110 } else { 2111 /* 2112 * Try it as a keyword type. If that fails, try it as 2113 * an SUS integer type if it begins with "d" or as an 2114 * SUS string type if it begins with "s". In any case, 2115 * it's not unsigned. 2116 */ 2117 m->type = get_type(type_tbl, l, &l); 2118 if (m->type == FILE_INVALID) { 2119 /* 2120 * Not a keyword type; parse it as an SUS type, 2121 * either 'd' possibly followed by a number or 2122 * C/S/L, or just 's'. 2123 */ 2124 if (*l == 'd') 2125 m->type = get_standard_integer_type(l, &l); 2126 else if (*l == 's' 2127 && !isalpha(CAST(unsigned char, l[1]))) { 2128 m->type = FILE_STRING; 2129 ++l; 2130 } 2131 } 2132 } 2133 2134 if (m->type == FILE_INVALID) { 2135 /* Not found - try it as a special keyword. */ 2136 m->type = get_type(special_tbl, l, &l); 2137 } 2138 2139 if (m->type == FILE_INVALID) { 2140 if (ms->flags & MAGIC_CHECK) 2141 file_magwarn(ms, "type `%s' invalid", l); 2142 return -1; 2143 } 2144 2145 if (m->type == FILE_NAME && cont_level != 0) { 2146 if (ms->flags & MAGIC_CHECK) 2147 file_magwarn(ms, "`name%s' entries can only be " 2148 "declared at top level", l); 2149 return -1; 2150 } 2151 2152 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 2153 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 2154 2155 m->mask_op = 0; 2156 if (*l == '~') { 2157 if (!IS_STRING(m->type)) 2158 m->mask_op |= FILE_OPINVERSE; 2159 else if (ms->flags & MAGIC_CHECK) 2160 file_magwarn(ms, "'~' invalid for string types"); 2161 ++l; 2162 } 2163 m->str_range = 0; 2164 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 2165 if ((op = get_op(*l)) != -1) { 2166 if (IS_STRING(m->type)) { 2167 int r; 2168 2169 if (op != FILE_OPDIVIDE) { 2170 if (ms->flags & MAGIC_CHECK) 2171 file_magwarn(ms, 2172 "invalid string/indirect op: " 2173 "`%c'", *t); 2174 return -1; 2175 } 2176 2177 if (m->type == FILE_INDIRECT) 2178 r = parse_indirect_modifier(ms, m, &l); 2179 else 2180 r = parse_string_modifier(ms, m, &l); 2181 if (r == -1) 2182 return -1; 2183 } else 2184 parse_op_modifier(ms, m, &l, op); 2185 } 2186 2187 /* 2188 * We used to set mask to all 1's here, instead let's just not do 2189 * anything if mask = 0 (unless you have a better idea) 2190 */ 2191 EATAB; 2192 2193 switch (*l) { 2194 case '>': 2195 case '<': 2196 m->reln = *l; 2197 ++l; 2198 if (*l == '=') { 2199 if (ms->flags & MAGIC_CHECK) { 2200 file_magwarn(ms, "%c= not supported", 2201 m->reln); 2202 return -1; 2203 } 2204 ++l; 2205 } 2206 break; 2207 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 2208 case '&': 2209 case '^': 2210 case '=': 2211 m->reln = *l; 2212 ++l; 2213 if (*l == '=') { 2214 /* HP compat: ignore &= etc. */ 2215 ++l; 2216 } 2217 break; 2218 case '!': 2219 m->reln = *l; 2220 ++l; 2221 break; 2222 default: 2223 m->reln = '='; /* the default relation */ 2224 if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) && 2225 isspace(CAST(unsigned char, l[1]))) || !l[1])) { 2226 m->reln = *l; 2227 ++l; 2228 } 2229 break; 2230 } 2231 /* 2232 * Grab the value part, except for an 'x' reln. 2233 */ 2234 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 2235 return -1; 2236 2237 /* 2238 * TODO finish this macro and start using it! 2239 * #define offsetcheck {if (offset > ms->bytes_max -1) 2240 * magwarn("offset too big"); } 2241 */ 2242 2243 /* 2244 * Now get last part - the description 2245 */ 2246 EATAB; 2247 if (l[0] == '\b') { 2248 ++l; 2249 m->flag |= NOSPACE; 2250 } else if ((l[0] == '\\') && (l[1] == 'b')) { 2251 ++l; 2252 ++l; 2253 m->flag |= NOSPACE; 2254 } 2255 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 2256 continue; 2257 if (i == sizeof(m->desc)) { 2258 m->desc[sizeof(m->desc) - 1] = '\0'; 2259 if (ms->flags & MAGIC_CHECK) 2260 file_magwarn(ms, "description `%s' truncated", m->desc); 2261 } 2262 2263 /* 2264 * We only do this check while compiling, or if any of the magic 2265 * files were not compiled. 2266 */ 2267 if (ms->flags & MAGIC_CHECK) { 2268 if (check_format(ms, m) == -1) 2269 return -1; 2270 } 2271 #ifndef COMPILE_ONLY 2272 if (action == FILE_CHECK) { 2273 file_mdump(m); 2274 } 2275 #endif 2276 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2277 return 0; 2278 } 2279 2280 /* 2281 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2282 * if valid 2283 */ 2284 private int 2285 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line, 2286 size_t len __attribute__((__unused__))) 2287 { 2288 const char *l = line; 2289 char *el; 2290 unsigned long factor; 2291 struct magic *m = &me->mp[0]; 2292 2293 if (m->factor_op != FILE_FACTOR_OP_NONE) { 2294 file_magwarn(ms, 2295 "Current entry already has a strength type: %c %d", 2296 m->factor_op, m->factor); 2297 return -1; 2298 } 2299 if (m->type == FILE_NAME) { 2300 file_magwarn(ms, "%s: Strength setting is not supported in " 2301 "\"name\" magic entries", m->value.s); 2302 return -1; 2303 } 2304 EATAB; 2305 switch (*l) { 2306 case FILE_FACTOR_OP_NONE: 2307 case FILE_FACTOR_OP_PLUS: 2308 case FILE_FACTOR_OP_MINUS: 2309 case FILE_FACTOR_OP_TIMES: 2310 case FILE_FACTOR_OP_DIV: 2311 m->factor_op = *l++; 2312 break; 2313 default: 2314 file_magwarn(ms, "Unknown factor op `%c'", *l); 2315 return -1; 2316 } 2317 EATAB; 2318 factor = strtoul(l, &el, 0); 2319 if (factor > 255) { 2320 file_magwarn(ms, "Too large factor `%lu'", factor); 2321 goto out; 2322 } 2323 if (*el && !isspace(CAST(unsigned char, *el))) { 2324 file_magwarn(ms, "Bad factor `%s'", l); 2325 goto out; 2326 } 2327 m->factor = CAST(uint8_t, factor); 2328 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2329 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2330 m->factor_op, m->factor); 2331 goto out; 2332 } 2333 return 0; 2334 out: 2335 m->factor_op = FILE_FACTOR_OP_NONE; 2336 m->factor = 0; 2337 return -1; 2338 } 2339 2340 private int 2341 goodchar(unsigned char x, const char *extra) 2342 { 2343 return (isascii(x) && isalnum(x)) || strchr(extra, x); 2344 } 2345 2346 private int 2347 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2348 size_t llen, off_t off, size_t len, const char *name, const char *extra, 2349 int nt) 2350 { 2351 size_t i; 2352 const char *l = line; 2353 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2354 char *buf = CAST(char *, CAST(void *, m)) + off; 2355 2356 if (buf[0] != '\0') { 2357 len = nt ? strlen(buf) : len; 2358 file_magwarn(ms, "Current entry already has a %s type " 2359 "`%.*s', new type `%s'", name, CAST(int, len), buf, l); 2360 return -1; 2361 } 2362 2363 if (*m->desc == '\0') { 2364 file_magwarn(ms, "Current entry does not yet have a " 2365 "description for adding a %s type", name); 2366 return -1; 2367 } 2368 2369 EATAB; 2370 for (i = 0; *l && i < llen && i < len && goodchar(*l, extra); 2371 buf[i++] = *l++) 2372 continue; 2373 2374 if (i == len && *l) { 2375 if (nt) 2376 buf[len - 1] = '\0'; 2377 if (ms->flags & MAGIC_CHECK) 2378 file_magwarn(ms, "%s type `%s' truncated %" 2379 SIZE_T_FORMAT "u", name, line, i); 2380 } else { 2381 if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra)) 2382 file_magwarn(ms, "%s type `%s' has bad char '%c'", 2383 name, line, *l); 2384 if (nt) 2385 buf[i] = '\0'; 2386 } 2387 2388 if (i > 0) 2389 return 0; 2390 2391 file_magerror(ms, "Bad magic entry '%s'", line); 2392 return -1; 2393 } 2394 2395 /* 2396 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2397 * magic[index - 1] 2398 */ 2399 private int 2400 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line, 2401 size_t len) 2402 { 2403 struct magic *m = &me->mp[0]; 2404 2405 return parse_extra(ms, me, line, len, 2406 CAST(off_t, offsetof(struct magic, apple)), 2407 sizeof(m->apple), "APPLE", "!+-./?", 0); 2408 } 2409 2410 /* 2411 * Parse a comma-separated list of extensions 2412 */ 2413 private int 2414 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line, 2415 size_t len) 2416 { 2417 struct magic *m = &me->mp[0]; 2418 2419 return parse_extra(ms, me, line, len, 2420 CAST(off_t, offsetof(struct magic, ext)), 2421 sizeof(m->ext), "EXTENSION", ",!+-/@?_$", 0); 2422 } 2423 2424 /* 2425 * parse a MIME annotation line from magic file, put into magic[index - 1] 2426 * if valid 2427 */ 2428 private int 2429 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line, 2430 size_t len) 2431 { 2432 struct magic *m = &me->mp[0]; 2433 2434 return parse_extra(ms, me, line, len, 2435 CAST(off_t, offsetof(struct magic, mimetype)), 2436 sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1); 2437 } 2438 2439 private int 2440 check_format_type(const char *ptr, int type, const char **estr) 2441 { 2442 int quad = 0, h; 2443 size_t len, cnt; 2444 if (*ptr == '\0') { 2445 /* Missing format string; bad */ 2446 *estr = "missing format spec"; 2447 return -1; 2448 } 2449 2450 switch (file_formats[type]) { 2451 case FILE_FMT_QUAD: 2452 quad = 1; 2453 /*FALLTHROUGH*/ 2454 case FILE_FMT_NUM: 2455 if (quad == 0) { 2456 switch (type) { 2457 case FILE_BYTE: 2458 h = 2; 2459 break; 2460 case FILE_SHORT: 2461 case FILE_BESHORT: 2462 case FILE_LESHORT: 2463 h = 1; 2464 break; 2465 case FILE_LONG: 2466 case FILE_BELONG: 2467 case FILE_LELONG: 2468 case FILE_MELONG: 2469 case FILE_LEID3: 2470 case FILE_BEID3: 2471 case FILE_INDIRECT: 2472 h = 0; 2473 break; 2474 default: 2475 abort(); 2476 } 2477 } else 2478 h = 0; 2479 if (*ptr == '-') 2480 ptr++; 2481 if (*ptr == '.') 2482 ptr++; 2483 if (*ptr == '#') 2484 ptr++; 2485 #define CHECKLEN() do { \ 2486 for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \ 2487 len = len * 10 + (*ptr - '0'); \ 2488 if (cnt > 5 || len > 1024) \ 2489 goto toolong; \ 2490 } while (/*CONSTCOND*/0) 2491 2492 CHECKLEN(); 2493 if (*ptr == '.') 2494 ptr++; 2495 CHECKLEN(); 2496 if (quad) { 2497 if (*ptr++ != 'l') 2498 goto invalid; 2499 if (*ptr++ != 'l') 2500 goto invalid; 2501 } 2502 2503 switch (*ptr++) { 2504 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */ 2505 /* so don't accept the 'l' modifier */ 2506 case 'l': 2507 switch (*ptr++) { 2508 case 'i': 2509 case 'd': 2510 case 'u': 2511 case 'o': 2512 case 'x': 2513 case 'X': 2514 if (h == 0) 2515 return 0; 2516 /*FALLTHROUGH*/ 2517 default: 2518 goto invalid; 2519 } 2520 2521 /* 2522 * Don't accept h and hh modifiers. They make writing 2523 * magic entries more complicated, for very little benefit 2524 */ 2525 case 'h': 2526 if (h-- <= 0) 2527 goto invalid; 2528 switch (*ptr++) { 2529 case 'h': 2530 if (h-- <= 0) 2531 goto invalid; 2532 switch (*ptr++) { 2533 case 'i': 2534 case 'd': 2535 case 'u': 2536 case 'o': 2537 case 'x': 2538 case 'X': 2539 return 0; 2540 default: 2541 goto invalid; 2542 } 2543 case 'i': 2544 case 'd': 2545 case 'u': 2546 case 'o': 2547 case 'x': 2548 case 'X': 2549 if (h == 0) 2550 return 0; 2551 /*FALLTHROUGH*/ 2552 default: 2553 goto invalid; 2554 } 2555 #endif 2556 case 'c': 2557 if (h == 2) 2558 return 0; 2559 goto invalid; 2560 case 'i': 2561 case 'd': 2562 case 'u': 2563 case 'o': 2564 case 'x': 2565 case 'X': 2566 #ifdef STRICT_FORMAT 2567 if (h == 0) 2568 return 0; 2569 /*FALLTHROUGH*/ 2570 #else 2571 return 0; 2572 #endif 2573 default: 2574 goto invalid; 2575 } 2576 2577 case FILE_FMT_FLOAT: 2578 case FILE_FMT_DOUBLE: 2579 if (*ptr == '-') 2580 ptr++; 2581 if (*ptr == '.') 2582 ptr++; 2583 CHECKLEN(); 2584 if (*ptr == '.') 2585 ptr++; 2586 CHECKLEN(); 2587 switch (*ptr++) { 2588 case 'e': 2589 case 'E': 2590 case 'f': 2591 case 'F': 2592 case 'g': 2593 case 'G': 2594 return 0; 2595 2596 default: 2597 goto invalid; 2598 } 2599 2600 2601 case FILE_FMT_STR: 2602 if (*ptr == '-') 2603 ptr++; 2604 while (isdigit(CAST(unsigned char, *ptr))) 2605 ptr++; 2606 if (*ptr == '.') { 2607 ptr++; 2608 while (isdigit(CAST(unsigned char , *ptr))) 2609 ptr++; 2610 } 2611 2612 switch (*ptr++) { 2613 case 's': 2614 return 0; 2615 default: 2616 goto invalid; 2617 } 2618 2619 default: 2620 /* internal error */ 2621 abort(); 2622 } 2623 invalid: 2624 *estr = "not valid"; 2625 toolong: 2626 *estr = "too long"; 2627 return -1; 2628 } 2629 2630 /* 2631 * Check that the optional printf format in description matches 2632 * the type of the magic. 2633 */ 2634 private int 2635 check_format(struct magic_set *ms, struct magic *m) 2636 { 2637 char *ptr; 2638 const char *estr; 2639 2640 for (ptr = m->desc; *ptr; ptr++) 2641 if (*ptr == '%') 2642 break; 2643 if (*ptr == '\0') { 2644 /* No format string; ok */ 2645 return 1; 2646 } 2647 2648 assert(file_nformats == file_nnames); 2649 2650 if (m->type >= file_nformats) { 2651 file_magwarn(ms, "Internal error inconsistency between " 2652 "m->type and format strings"); 2653 return -1; 2654 } 2655 if (file_formats[m->type] == FILE_FMT_NONE) { 2656 file_magwarn(ms, "No format string for `%s' with description " 2657 "`%s'", m->desc, file_names[m->type]); 2658 return -1; 2659 } 2660 2661 ptr++; 2662 if (check_format_type(ptr, m->type, &estr) == -1) { 2663 /* 2664 * TODO: this error message is unhelpful if the format 2665 * string is not one character long 2666 */ 2667 file_magwarn(ms, "Printf format is %s for type " 2668 "`%s' in description `%s'", estr, 2669 file_names[m->type], m->desc); 2670 return -1; 2671 } 2672 2673 for (; *ptr; ptr++) { 2674 if (*ptr == '%') { 2675 file_magwarn(ms, 2676 "Too many format strings (should have at most one) " 2677 "for `%s' with description `%s'", 2678 file_names[m->type], m->desc); 2679 return -1; 2680 } 2681 } 2682 return 0; 2683 } 2684 2685 /* 2686 * Read a numeric value from a pointer, into the value union of a magic 2687 * pointer, according to the magic type. Update the string pointer to point 2688 * just after the number read. Return 0 for success, non-zero for failure. 2689 */ 2690 private int 2691 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2692 { 2693 char *ep; 2694 uint64_t ull; 2695 2696 switch (m->type) { 2697 case FILE_BESTRING16: 2698 case FILE_LESTRING16: 2699 case FILE_STRING: 2700 case FILE_PSTRING: 2701 case FILE_REGEX: 2702 case FILE_SEARCH: 2703 case FILE_NAME: 2704 case FILE_USE: 2705 case FILE_DER: 2706 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2707 if (*p == NULL) { 2708 if (ms->flags & MAGIC_CHECK) 2709 file_magwarn(ms, "cannot get string from `%s'", 2710 m->value.s); 2711 return -1; 2712 } 2713 if (m->type == FILE_REGEX) { 2714 file_regex_t rx; 2715 int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); 2716 if (rc) { 2717 if (ms->flags & MAGIC_CHECK) 2718 file_regerror(&rx, rc, ms); 2719 } 2720 file_regfree(&rx); 2721 return rc ? -1 : 0; 2722 } 2723 return 0; 2724 default: 2725 if (m->reln == 'x') 2726 return 0; 2727 break; 2728 } 2729 2730 switch (m->type) { 2731 case FILE_FLOAT: 2732 case FILE_BEFLOAT: 2733 case FILE_LEFLOAT: 2734 errno = 0; 2735 #ifdef HAVE_STRTOF 2736 m->value.f = strtof(*p, &ep); 2737 #else 2738 m->value.f = (float)strtod(*p, &ep); 2739 #endif 2740 if (errno == 0) 2741 *p = ep; 2742 return 0; 2743 case FILE_DOUBLE: 2744 case FILE_BEDOUBLE: 2745 case FILE_LEDOUBLE: 2746 errno = 0; 2747 m->value.d = strtod(*p, &ep); 2748 if (errno == 0) 2749 *p = ep; 2750 return 0; 2751 case FILE_GUID: 2752 if (file_parse_guid(*p, m->value.guid) == -1) 2753 return -1; 2754 *p += FILE_GUID_SIZE - 1; 2755 return 0; 2756 default: 2757 errno = 0; 2758 ull = CAST(uint64_t, strtoull(*p, &ep, 0)); 2759 m->value.q = file_signextend(ms, m, ull); 2760 if (*p == ep) { 2761 file_magwarn(ms, "Unparsable number `%s'", *p); 2762 } else { 2763 size_t ts = typesize(m->type); 2764 uint64_t x; 2765 const char *q; 2766 2767 if (ts == FILE_BADSIZE) { 2768 file_magwarn(ms, 2769 "Expected numeric type got `%s'", 2770 type_tbl[m->type].name); 2771 } 2772 for (q = *p; isspace(CAST(unsigned char, *q)); q++) 2773 continue; 2774 if (*q == '-') 2775 ull = -CAST(int64_t, ull); 2776 switch (ts) { 2777 case 1: 2778 x = CAST(uint64_t, ull & ~0xffULL); 2779 break; 2780 case 2: 2781 x = CAST(uint64_t, ull & ~0xffffULL); 2782 break; 2783 case 4: 2784 x = CAST(uint64_t, ull & ~0xffffffffULL); 2785 break; 2786 case 8: 2787 x = 0; 2788 break; 2789 default: 2790 abort(); 2791 } 2792 if (x) { 2793 file_magwarn(ms, "Overflow for numeric" 2794 " type `%s' value %#" PRIx64, 2795 type_tbl[m->type].name, ull); 2796 } 2797 } 2798 if (errno == 0) { 2799 *p = ep; 2800 eatsize(p); 2801 } 2802 return 0; 2803 } 2804 } 2805 2806 /* 2807 * Convert a string containing C character escapes. Stop at an unescaped 2808 * space or tab. 2809 * Copy the converted version to "m->value.s", and the length in m->vallen. 2810 * Return updated scan pointer as function result. Warn if set. 2811 */ 2812 private const char * 2813 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2814 { 2815 const char *origs = s; 2816 char *p = m->value.s; 2817 size_t plen = sizeof(m->value.s); 2818 char *origp = p; 2819 char *pmax = p + plen - 1; 2820 int c; 2821 int val; 2822 2823 while ((c = *s++) != '\0') { 2824 if (isspace(CAST(unsigned char, c))) 2825 break; 2826 if (p >= pmax) { 2827 file_error(ms, 0, "string too long: `%s'", origs); 2828 return NULL; 2829 } 2830 if (c == '\\') { 2831 switch(c = *s++) { 2832 2833 case '\0': 2834 if (warn) 2835 file_magwarn(ms, "incomplete escape"); 2836 s--; 2837 goto out; 2838 2839 case '\t': 2840 if (warn) { 2841 file_magwarn(ms, 2842 "escaped tab found, use \\t instead"); 2843 warn = 0; /* already did */ 2844 } 2845 /*FALLTHROUGH*/ 2846 default: 2847 if (warn) { 2848 if (isprint(CAST(unsigned char, c))) { 2849 /* Allow escaping of 2850 * ``relations'' */ 2851 if (strchr("<>&^=!", c) == NULL 2852 && (m->type != FILE_REGEX || 2853 strchr("[]().*?^$|{}", c) 2854 == NULL)) { 2855 file_magwarn(ms, "no " 2856 "need to escape " 2857 "`%c'", c); 2858 } 2859 } else { 2860 file_magwarn(ms, 2861 "unknown escape sequence: " 2862 "\\%03o", c); 2863 } 2864 } 2865 /*FALLTHROUGH*/ 2866 /* space, perhaps force people to use \040? */ 2867 case ' ': 2868 #if 0 2869 /* 2870 * Other things people escape, but shouldn't need to, 2871 * so we disallow them 2872 */ 2873 case '\'': 2874 case '"': 2875 case '?': 2876 #endif 2877 /* Relations */ 2878 case '>': 2879 case '<': 2880 case '&': 2881 case '^': 2882 case '=': 2883 case '!': 2884 /* and baskslash itself */ 2885 case '\\': 2886 *p++ = CAST(char, c); 2887 break; 2888 2889 case 'a': 2890 *p++ = '\a'; 2891 break; 2892 2893 case 'b': 2894 *p++ = '\b'; 2895 break; 2896 2897 case 'f': 2898 *p++ = '\f'; 2899 break; 2900 2901 case 'n': 2902 *p++ = '\n'; 2903 break; 2904 2905 case 'r': 2906 *p++ = '\r'; 2907 break; 2908 2909 case 't': 2910 *p++ = '\t'; 2911 break; 2912 2913 case 'v': 2914 *p++ = '\v'; 2915 break; 2916 2917 /* \ and up to 3 octal digits */ 2918 case '0': 2919 case '1': 2920 case '2': 2921 case '3': 2922 case '4': 2923 case '5': 2924 case '6': 2925 case '7': 2926 val = c - '0'; 2927 c = *s++; /* try for 2 */ 2928 if (c >= '0' && c <= '7') { 2929 val = (val << 3) | (c - '0'); 2930 c = *s++; /* try for 3 */ 2931 if (c >= '0' && c <= '7') 2932 val = (val << 3) | (c-'0'); 2933 else 2934 --s; 2935 } 2936 else 2937 --s; 2938 *p++ = CAST(char, val); 2939 break; 2940 2941 /* \x and up to 2 hex digits */ 2942 case 'x': 2943 val = 'x'; /* Default if no digits */ 2944 c = hextoint(*s++); /* Get next char */ 2945 if (c >= 0) { 2946 val = c; 2947 c = hextoint(*s++); 2948 if (c >= 0) 2949 val = (val << 4) + c; 2950 else 2951 --s; 2952 } else 2953 --s; 2954 *p++ = CAST(char, val); 2955 break; 2956 } 2957 } else 2958 *p++ = CAST(char, c); 2959 } 2960 --s; 2961 out: 2962 *p = '\0'; 2963 m->vallen = CAST(unsigned char, (p - origp)); 2964 if (m->type == FILE_PSTRING) { 2965 size_t l = file_pstring_length_size(ms, m); 2966 if (l == FILE_BADSIZE) 2967 return NULL; 2968 m->vallen += CAST(unsigned char, l); 2969 } 2970 return s; 2971 } 2972 2973 2974 /* Single hex char to int; -1 if not a hex char. */ 2975 private int 2976 hextoint(int c) 2977 { 2978 if (!isascii(CAST(unsigned char, c))) 2979 return -1; 2980 if (isdigit(CAST(unsigned char, c))) 2981 return c - '0'; 2982 if ((c >= 'a') && (c <= 'f')) 2983 return c + 10 - 'a'; 2984 if (( c>= 'A') && (c <= 'F')) 2985 return c + 10 - 'A'; 2986 return -1; 2987 } 2988 2989 2990 /* 2991 * Print a string containing C character escapes. 2992 */ 2993 protected void 2994 file_showstr(FILE *fp, const char *s, size_t len) 2995 { 2996 char c; 2997 2998 for (;;) { 2999 if (len == FILE_BADSIZE) { 3000 c = *s++; 3001 if (c == '\0') 3002 break; 3003 } 3004 else { 3005 if (len-- == 0) 3006 break; 3007 c = *s++; 3008 } 3009 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 3010 (void) fputc(c, fp); 3011 else { 3012 (void) fputc('\\', fp); 3013 switch (c) { 3014 case '\a': 3015 (void) fputc('a', fp); 3016 break; 3017 3018 case '\b': 3019 (void) fputc('b', fp); 3020 break; 3021 3022 case '\f': 3023 (void) fputc('f', fp); 3024 break; 3025 3026 case '\n': 3027 (void) fputc('n', fp); 3028 break; 3029 3030 case '\r': 3031 (void) fputc('r', fp); 3032 break; 3033 3034 case '\t': 3035 (void) fputc('t', fp); 3036 break; 3037 3038 case '\v': 3039 (void) fputc('v', fp); 3040 break; 3041 3042 default: 3043 (void) fprintf(fp, "%.3o", c & 0377); 3044 break; 3045 } 3046 } 3047 } 3048 } 3049 3050 /* 3051 * eatsize(): Eat the size spec from a number [eg. 10UL] 3052 */ 3053 private void 3054 eatsize(const char **p) 3055 { 3056 const char *l = *p; 3057 3058 if (LOWCASE(*l) == 'u') 3059 l++; 3060 3061 switch (LOWCASE(*l)) { 3062 case 'l': /* long */ 3063 case 's': /* short */ 3064 case 'h': /* short */ 3065 case 'b': /* char/byte */ 3066 case 'c': /* char/byte */ 3067 l++; 3068 /*FALLTHROUGH*/ 3069 default: 3070 break; 3071 } 3072 3073 *p = l; 3074 } 3075 3076 /* 3077 * handle a buffer containing a compiled file. 3078 */ 3079 private struct magic_map * 3080 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len) 3081 { 3082 struct magic_map *map; 3083 3084 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3085 file_oomem(ms, sizeof(*map)); 3086 return NULL; 3087 } 3088 map->len = len; 3089 map->p = buf; 3090 map->type = MAP_TYPE_USER; 3091 if (check_buffer(ms, map, "buffer") != 0) { 3092 apprentice_unmap(map); 3093 return NULL; 3094 } 3095 return map; 3096 } 3097 3098 /* 3099 * handle a compiled file. 3100 */ 3101 3102 private struct magic_map * 3103 apprentice_map(struct magic_set *ms, const char *fn) 3104 { 3105 int fd; 3106 struct stat st; 3107 char *dbname = NULL; 3108 struct magic_map *map; 3109 struct magic_map *rv = NULL; 3110 3111 fd = -1; 3112 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 3113 file_oomem(ms, sizeof(*map)); 3114 goto error; 3115 } 3116 map->type = MAP_TYPE_USER; /* unspecified */ 3117 3118 dbname = mkdbname(ms, fn, 0); 3119 if (dbname == NULL) 3120 goto error; 3121 3122 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 3123 goto error; 3124 3125 if (fstat(fd, &st) == -1) { 3126 file_error(ms, errno, "cannot stat `%s'", dbname); 3127 goto error; 3128 } 3129 if (st.st_size < 8 || st.st_size > maxoff_t()) { 3130 file_error(ms, 0, "file `%s' is too %s", dbname, 3131 st.st_size < 8 ? "small" : "large"); 3132 goto error; 3133 } 3134 3135 map->len = CAST(size_t, st.st_size); 3136 #ifdef QUICK 3137 map->type = MAP_TYPE_MMAP; 3138 if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE, 3139 MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) { 3140 file_error(ms, errno, "cannot map `%s'", dbname); 3141 goto error; 3142 } 3143 #else 3144 map->type = MAP_TYPE_MALLOC; 3145 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 3146 file_oomem(ms, map->len); 3147 goto error; 3148 } 3149 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 3150 file_badread(ms); 3151 goto error; 3152 } 3153 #endif 3154 (void)close(fd); 3155 fd = -1; 3156 3157 if (check_buffer(ms, map, dbname) != 0) { 3158 goto error; 3159 } 3160 #ifdef QUICK 3161 if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) { 3162 file_error(ms, errno, "cannot mprotect `%s'", dbname); 3163 goto error; 3164 } 3165 #endif 3166 3167 free(dbname); 3168 return map; 3169 3170 error: 3171 if (fd != -1) 3172 (void)close(fd); 3173 apprentice_unmap(map); 3174 free(dbname); 3175 return rv; 3176 } 3177 3178 private int 3179 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) 3180 { 3181 uint32_t *ptr; 3182 uint32_t entries, nentries; 3183 uint32_t version; 3184 int i, needsbyteswap; 3185 3186 ptr = CAST(uint32_t *, map->p); 3187 if (*ptr != MAGICNO) { 3188 if (swap4(*ptr) != MAGICNO) { 3189 file_error(ms, 0, "bad magic in `%s'", dbname); 3190 return -1; 3191 } 3192 needsbyteswap = 1; 3193 } else 3194 needsbyteswap = 0; 3195 if (needsbyteswap) 3196 version = swap4(ptr[1]); 3197 else 3198 version = ptr[1]; 3199 if (version != VERSIONNO) { 3200 file_error(ms, 0, "File %s supports only version %d magic " 3201 "files. `%s' is version %d", VERSION, 3202 VERSIONNO, dbname, version); 3203 return -1; 3204 } 3205 entries = CAST(uint32_t, map->len / sizeof(struct magic)); 3206 if ((entries * sizeof(struct magic)) != map->len) { 3207 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not " 3208 "a multiple of %" SIZE_T_FORMAT "u", 3209 dbname, map->len, sizeof(struct magic)); 3210 return -1; 3211 } 3212 map->magic[0] = CAST(struct magic *, map->p) + 1; 3213 nentries = 0; 3214 for (i = 0; i < MAGIC_SETS; i++) { 3215 if (needsbyteswap) 3216 map->nmagic[i] = swap4(ptr[i + 2]); 3217 else 3218 map->nmagic[i] = ptr[i + 2]; 3219 if (i != MAGIC_SETS - 1) 3220 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 3221 nentries += map->nmagic[i]; 3222 } 3223 if (entries != nentries + 1) { 3224 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 3225 dbname, entries, nentries + 1); 3226 return -1; 3227 } 3228 if (needsbyteswap) 3229 for (i = 0; i < MAGIC_SETS; i++) 3230 byteswap(map->magic[i], map->nmagic[i]); 3231 return 0; 3232 } 3233 3234 /* 3235 * handle an mmaped file. 3236 */ 3237 private int 3238 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 3239 { 3240 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 3241 static const size_t m = sizeof(**map->magic); 3242 int fd = -1; 3243 size_t len; 3244 char *dbname; 3245 int rv = -1; 3246 uint32_t i; 3247 union { 3248 struct magic m; 3249 uint32_t h[2 + MAGIC_SETS]; 3250 } hdr; 3251 3252 dbname = mkdbname(ms, fn, 1); 3253 3254 if (dbname == NULL) 3255 goto out; 3256 3257 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 3258 { 3259 file_error(ms, errno, "cannot open `%s'", dbname); 3260 goto out; 3261 } 3262 memset(&hdr, 0, sizeof(hdr)); 3263 hdr.h[0] = MAGICNO; 3264 hdr.h[1] = VERSIONNO; 3265 memcpy(hdr.h + 2, map->nmagic, nm); 3266 3267 if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) { 3268 file_error(ms, errno, "error writing `%s'", dbname); 3269 goto out2; 3270 } 3271 3272 for (i = 0; i < MAGIC_SETS; i++) { 3273 len = m * map->nmagic[i]; 3274 if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) { 3275 file_error(ms, errno, "error writing `%s'", dbname); 3276 goto out2; 3277 } 3278 } 3279 3280 rv = 0; 3281 out2: 3282 if (fd != -1) 3283 (void)close(fd); 3284 out: 3285 apprentice_unmap(map); 3286 free(dbname); 3287 return rv; 3288 } 3289 3290 private const char ext[] = ".mgc"; 3291 /* 3292 * make a dbname 3293 */ 3294 private char * 3295 mkdbname(struct magic_set *ms, const char *fn, int strip) 3296 { 3297 const char *p, *q; 3298 char *buf; 3299 3300 if (strip) { 3301 if ((p = strrchr(fn, '/')) != NULL) 3302 fn = ++p; 3303 } 3304 3305 for (q = fn; *q; q++) 3306 continue; 3307 /* Look for .mgc */ 3308 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 3309 if (*p != *q) 3310 break; 3311 3312 /* Did not find .mgc, restore q */ 3313 if (p >= ext) 3314 while (*q) 3315 q++; 3316 3317 q++; 3318 /* Compatibility with old code that looked in .mime */ 3319 if (ms->flags & MAGIC_MIME) { 3320 if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext) 3321 < 0) 3322 return NULL; 3323 if (access(buf, R_OK) != -1) { 3324 ms->flags &= MAGIC_MIME_TYPE; 3325 return buf; 3326 } 3327 free(buf); 3328 } 3329 if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0) 3330 return NULL; 3331 3332 /* Compatibility with old code that looked in .mime */ 3333 if (strstr(fn, ".mime") != NULL) 3334 ms->flags &= MAGIC_MIME_TYPE; 3335 return buf; 3336 } 3337 3338 /* 3339 * Byteswap an mmap'ed file if needed 3340 */ 3341 private void 3342 byteswap(struct magic *magic, uint32_t nmagic) 3343 { 3344 uint32_t i; 3345 for (i = 0; i < nmagic; i++) 3346 bs1(&magic[i]); 3347 } 3348 3349 /* 3350 * swap a short 3351 */ 3352 private uint16_t 3353 swap2(uint16_t sv) 3354 { 3355 uint16_t rv; 3356 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3357 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3358 d[0] = s[1]; 3359 d[1] = s[0]; 3360 return rv; 3361 } 3362 3363 /* 3364 * swap an int 3365 */ 3366 private uint32_t 3367 swap4(uint32_t sv) 3368 { 3369 uint32_t rv; 3370 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3371 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3372 d[0] = s[3]; 3373 d[1] = s[2]; 3374 d[2] = s[1]; 3375 d[3] = s[0]; 3376 return rv; 3377 } 3378 3379 /* 3380 * swap a quad 3381 */ 3382 private uint64_t 3383 swap8(uint64_t sv) 3384 { 3385 uint64_t rv; 3386 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv)); 3387 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv)); 3388 #if 0 3389 d[0] = s[3]; 3390 d[1] = s[2]; 3391 d[2] = s[1]; 3392 d[3] = s[0]; 3393 d[4] = s[7]; 3394 d[5] = s[6]; 3395 d[6] = s[5]; 3396 d[7] = s[4]; 3397 #else 3398 d[0] = s[7]; 3399 d[1] = s[6]; 3400 d[2] = s[5]; 3401 d[3] = s[4]; 3402 d[4] = s[3]; 3403 d[5] = s[2]; 3404 d[6] = s[1]; 3405 d[7] = s[0]; 3406 #endif 3407 return rv; 3408 } 3409 3410 /* 3411 * byteswap a single magic entry 3412 */ 3413 private void 3414 bs1(struct magic *m) 3415 { 3416 m->cont_level = swap2(m->cont_level); 3417 m->offset = swap4(CAST(uint32_t, m->offset)); 3418 m->in_offset = swap4(CAST(uint32_t, m->in_offset)); 3419 m->lineno = swap4(CAST(uint32_t, m->lineno)); 3420 if (IS_STRING(m->type)) { 3421 m->str_range = swap4(m->str_range); 3422 m->str_flags = swap4(m->str_flags); 3423 } 3424 else { 3425 m->value.q = swap8(m->value.q); 3426 m->num_mask = swap8(m->num_mask); 3427 } 3428 } 3429 3430 protected size_t 3431 file_pstring_length_size(struct magic_set *ms, const struct magic *m) 3432 { 3433 switch (m->str_flags & PSTRING_LEN) { 3434 case PSTRING_1_LE: 3435 return 1; 3436 case PSTRING_2_LE: 3437 case PSTRING_2_BE: 3438 return 2; 3439 case PSTRING_4_LE: 3440 case PSTRING_4_BE: 3441 return 4; 3442 default: 3443 file_error(ms, 0, "corrupt magic file " 3444 "(bad pascal string length %d)", 3445 m->str_flags & PSTRING_LEN); 3446 return FILE_BADSIZE; 3447 } 3448 } 3449 protected size_t 3450 file_pstring_get_length(struct magic_set *ms, const struct magic *m, 3451 const char *ss) 3452 { 3453 size_t len = 0; 3454 const unsigned char *s = RCAST(const unsigned char *, ss); 3455 unsigned int s3, s2, s1, s0; 3456 3457 switch (m->str_flags & PSTRING_LEN) { 3458 case PSTRING_1_LE: 3459 len = *s; 3460 break; 3461 case PSTRING_2_LE: 3462 s0 = s[0]; 3463 s1 = s[1]; 3464 len = (s1 << 8) | s0; 3465 break; 3466 case PSTRING_2_BE: 3467 s0 = s[0]; 3468 s1 = s[1]; 3469 len = (s0 << 8) | s1; 3470 break; 3471 case PSTRING_4_LE: 3472 s0 = s[0]; 3473 s1 = s[1]; 3474 s2 = s[2]; 3475 s3 = s[3]; 3476 len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0; 3477 break; 3478 case PSTRING_4_BE: 3479 s0 = s[0]; 3480 s1 = s[1]; 3481 s2 = s[2]; 3482 s3 = s[3]; 3483 len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3; 3484 break; 3485 default: 3486 file_error(ms, 0, "corrupt magic file " 3487 "(bad pascal string length %d)", 3488 m->str_flags & PSTRING_LEN); 3489 return FILE_BADSIZE; 3490 } 3491 3492 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) { 3493 size_t l = file_pstring_length_size(ms, m); 3494 if (l == FILE_BADSIZE) 3495 return l; 3496 len -= l; 3497 } 3498 3499 return len; 3500 } 3501 3502 protected int 3503 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3504 { 3505 uint32_t i, j; 3506 struct mlist *mlist, *ml; 3507 3508 mlist = ms->mlist[1]; 3509 3510 for (ml = mlist->next; ml != mlist; ml = ml->next) { 3511 struct magic *ma = ml->magic; 3512 uint32_t nma = ml->nmagic; 3513 for (i = 0; i < nma; i++) { 3514 if (ma[i].type != FILE_NAME) 3515 continue; 3516 if (strcmp(ma[i].value.s, name) == 0) { 3517 v->magic = &ma[i]; 3518 for (j = i + 1; j < nma; j++) 3519 if (ma[j].cont_level == 0) 3520 break; 3521 v->nmagic = j - i; 3522 return 0; 3523 } 3524 } 3525 } 3526 return -1; 3527 } 3528