1 /* $NetBSD: file.c,v 1.15 2020/06/15 00:37:24 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * file - find type of a file or files - main program. 32 */ 33 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: file.c,v 1.187 2020/06/07 17:38:30 christos Exp $") 39 #else 40 __RCSID("$NetBSD: file.c,v 1.15 2020/06/15 00:37:24 christos Exp $"); 41 #endif 42 #endif /* lint */ 43 44 #include "magic.h" 45 46 #include <stdlib.h> 47 #include <unistd.h> 48 #include <string.h> 49 #ifdef RESTORE_TIME 50 # if (__COHERENT__ >= 0x420) 51 # include <sys/utime.h> 52 # else 53 # ifdef USE_UTIMES 54 # include <sys/time.h> 55 # else 56 # include <utime.h> 57 # endif 58 # endif 59 #endif 60 #ifdef HAVE_UNISTD_H 61 #include <unistd.h> /* for read() */ 62 #endif 63 #ifdef HAVE_WCHAR_H 64 #include <wchar.h> 65 #endif 66 67 #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION) 68 # include <getopt.h> 69 # ifndef HAVE_GETOPT_LONG 70 int getopt_long(int, char * const *, const char *, 71 const struct option *, int *); 72 # endif 73 # else 74 # include "mygetopt.h" 75 #endif 76 77 #ifdef S_IFLNK 78 # define IFLNK_h "h" 79 # define IFLNK_L "L" 80 #else 81 # define IFLNK_h "" 82 # define IFLNK_L "" 83 #endif 84 85 #define FILE_FLAGS "bcCdE" IFLNK_h "ik" IFLNK_L "lNnprsSvzZ0" 86 #define OPTSTRING "bcCde:Ef:F:hiklLm:nNpP:rsSvzZ0" 87 88 # define USAGE \ 89 "Usage: %s [-" FILE_FLAGS "] [--apple] [--extension] [--mime-encoding]\n" \ 90 " [--mime-type] [-e <testname>] [-F <separator>] " \ 91 " [-f <namefile>]\n" \ 92 " [-m <magicfiles>] [-P <parameter=value>] [--exclude-quiet]\n" \ 93 " <file> ...\n" \ 94 " %s -C [-m <magicfiles>]\n" \ 95 " %s [--help]\n" 96 97 private int /* Global command-line options */ 98 bflag = 0, /* brief output format */ 99 nopad = 0, /* Don't pad output */ 100 nobuffer = 0, /* Do not buffer stdout */ 101 nulsep = 0; /* Append '\0' to the separator */ 102 103 private const char *separator = ":"; /* Default field separator */ 104 private const struct option long_options[] = { 105 #define OPT_HELP 1 106 #define OPT_APPLE 2 107 #define OPT_EXTENSIONS 3 108 #define OPT_MIME_TYPE 4 109 #define OPT_MIME_ENCODING 5 110 #define OPT_EXCLUDE_QUIET 6 111 #define OPT(shortname, longname, opt, def, doc) \ 112 {longname, opt, NULL, shortname}, 113 #define OPT_LONGONLY(longname, opt, def, doc, id) \ 114 {longname, opt, NULL, id}, 115 #include "file_opts.h" 116 #undef OPT 117 #undef OPT_LONGONLY 118 {0, 0, NULL, 0} 119 }; 120 121 private const struct { 122 const char *name; 123 int value; 124 } nv[] = { 125 { "apptype", MAGIC_NO_CHECK_APPTYPE }, 126 { "ascii", MAGIC_NO_CHECK_ASCII }, 127 { "cdf", MAGIC_NO_CHECK_CDF }, 128 { "compress", MAGIC_NO_CHECK_COMPRESS }, 129 { "csv", MAGIC_NO_CHECK_CSV }, 130 { "elf", MAGIC_NO_CHECK_ELF }, 131 { "encoding", MAGIC_NO_CHECK_ENCODING }, 132 { "soft", MAGIC_NO_CHECK_SOFT }, 133 { "tar", MAGIC_NO_CHECK_TAR }, 134 { "json", MAGIC_NO_CHECK_JSON }, 135 { "text", MAGIC_NO_CHECK_TEXT }, /* synonym for ascii */ 136 { "tokens", MAGIC_NO_CHECK_TOKENS }, /* OBSOLETE: ignored for backwards compatibility */ 137 }; 138 139 private struct { 140 const char *name; 141 int tag; 142 size_t value; 143 int set; 144 size_t def; 145 const char *desc; 146 } pm[] = { 147 { "bytes", MAGIC_PARAM_BYTES_MAX, 0, 0, FILE_BYTES_MAX, 148 "max bytes to look inside file" }, 149 { "elf_notes", MAGIC_PARAM_ELF_NOTES_MAX, 0, 0, FILE_ELF_NOTES_MAX, 150 "max ELF notes processed" }, 151 { "elf_phnum", MAGIC_PARAM_ELF_PHNUM_MAX, 0, 0, FILE_ELF_PHNUM_MAX, 152 "max ELF prog sections processed" }, 153 { "elf_shnum", MAGIC_PARAM_ELF_SHNUM_MAX, 0, 0, FILE_ELF_SHNUM_MAX, 154 "max ELF sections processed" }, 155 { "indir", MAGIC_PARAM_INDIR_MAX, 0, 0, FILE_INDIR_MAX, 156 "recursion limit for indirection" }, 157 { "name", MAGIC_PARAM_NAME_MAX, 0, 0, FILE_NAME_MAX, 158 "use limit for name/use magic" }, 159 { "regex", MAGIC_PARAM_REGEX_MAX, 0, 0, FILE_REGEX_MAX, 160 "length limit for REGEX searches" }, 161 }; 162 163 private int posixly; 164 165 #ifdef __dead 166 __dead 167 #endif 168 private void usage(void); 169 private void docprint(const char *, int); 170 #ifdef __dead 171 __dead 172 #endif 173 private void help(void); 174 175 private int unwrap(struct magic_set *, const char *); 176 private int process(struct magic_set *ms, const char *, int); 177 private struct magic_set *load(const char *, int); 178 private void setparam(const char *); 179 private void applyparam(magic_t); 180 181 182 /* 183 * main - parse arguments and handle options 184 */ 185 int 186 main(int argc, char *argv[]) 187 { 188 int c; 189 size_t i; 190 int action = 0, didsomefiles = 0, errflg = 0; 191 int flags = 0, e = 0; 192 #ifdef HAVE_LIBSECCOMP 193 int sandbox = 1; 194 #endif 195 struct magic_set *magic = NULL; 196 int longindex; 197 const char *magicfile = NULL; /* where the magic is */ 198 char *progname; 199 200 /* makes islower etc work for other langs */ 201 (void)setlocale(LC_CTYPE, ""); 202 203 #ifdef __EMX__ 204 /* sh-like wildcard expansion! Shouldn't hurt at least ... */ 205 _wildcard(&argc, &argv); 206 #endif 207 208 if ((progname = strrchr(argv[0], '/')) != NULL) 209 progname++; 210 else 211 progname = argv[0]; 212 213 file_setprogname(progname); 214 215 216 #ifdef S_IFLNK 217 posixly = getenv("POSIXLY_CORRECT") != NULL; 218 flags |= posixly ? MAGIC_SYMLINK : 0; 219 #endif 220 while ((c = getopt_long(argc, argv, OPTSTRING, long_options, 221 &longindex)) != -1) 222 switch (c) { 223 case OPT_HELP: 224 help(); 225 break; 226 case OPT_APPLE: 227 flags |= MAGIC_APPLE; 228 break; 229 case OPT_EXTENSIONS: 230 flags |= MAGIC_EXTENSION; 231 break; 232 case OPT_MIME_TYPE: 233 flags |= MAGIC_MIME_TYPE; 234 break; 235 case OPT_MIME_ENCODING: 236 flags |= MAGIC_MIME_ENCODING; 237 break; 238 case '0': 239 nulsep++; 240 break; 241 case 'b': 242 bflag++; 243 break; 244 case 'c': 245 action = FILE_CHECK; 246 break; 247 case 'C': 248 action = FILE_COMPILE; 249 break; 250 case 'd': 251 flags |= MAGIC_DEBUG|MAGIC_CHECK; 252 break; 253 case 'E': 254 flags |= MAGIC_ERROR; 255 break; 256 case 'e': 257 case OPT_EXCLUDE_QUIET: 258 for (i = 0; i < __arraycount(nv); i++) 259 if (strcmp(nv[i].name, optarg) == 0) 260 break; 261 262 if (i == __arraycount(nv)) { 263 if (c != OPT_EXCLUDE_QUIET) 264 errflg++; 265 } else 266 flags |= nv[i].value; 267 break; 268 269 case 'f': 270 if(action) 271 usage(); 272 if (magic == NULL) 273 if ((magic = load(magicfile, flags)) == NULL) 274 return 1; 275 applyparam(magic); 276 e |= unwrap(magic, optarg); 277 ++didsomefiles; 278 break; 279 case 'F': 280 separator = optarg; 281 break; 282 case 'i': 283 flags |= MAGIC_MIME; 284 break; 285 case 'k': 286 flags |= MAGIC_CONTINUE; 287 break; 288 case 'l': 289 action = FILE_LIST; 290 break; 291 case 'm': 292 magicfile = optarg; 293 break; 294 case 'n': 295 ++nobuffer; 296 break; 297 case 'N': 298 ++nopad; 299 break; 300 #if defined(HAVE_UTIME) || defined(HAVE_UTIMES) 301 case 'p': 302 flags |= MAGIC_PRESERVE_ATIME; 303 break; 304 #endif 305 case 'P': 306 setparam(optarg); 307 break; 308 case 'r': 309 flags |= MAGIC_RAW; 310 break; 311 case 's': 312 flags |= MAGIC_DEVICES; 313 break; 314 case 'S': 315 #ifdef HAVE_LIBSECCOMP 316 sandbox = 0; 317 #endif 318 break; 319 case 'v': 320 if (magicfile == NULL) 321 magicfile = magic_getpath(magicfile, action); 322 (void)fprintf(stdout, "%s-%s\n", file_getprogname(), 323 VERSION); 324 (void)fprintf(stdout, "magic file from %s\n", 325 magicfile); 326 #ifdef HAVE_LIBSECCOMP 327 (void)fprintf(stdout, "seccomp support included\n"); 328 #endif 329 return 0; 330 case 'z': 331 flags |= MAGIC_COMPRESS; 332 break; 333 334 case 'Z': 335 flags |= MAGIC_COMPRESS|MAGIC_COMPRESS_TRANSP; 336 break; 337 #ifdef S_IFLNK 338 case 'L': 339 flags |= MAGIC_SYMLINK; 340 break; 341 case 'h': 342 flags &= ~MAGIC_SYMLINK; 343 break; 344 #endif 345 case '?': 346 default: 347 errflg++; 348 break; 349 } 350 351 if (errflg) { 352 usage(); 353 } 354 if (e) 355 return e; 356 357 #ifdef HAVE_LIBSECCOMP 358 #if 0 359 if (sandbox && enable_sandbox_basic() == -1) 360 #else 361 if (sandbox && enable_sandbox_full() == -1) 362 #endif 363 file_err(EXIT_FAILURE, "SECCOMP initialisation failed"); 364 #endif /* HAVE_LIBSECCOMP */ 365 366 if (MAGIC_VERSION != magic_version()) 367 file_warnx("Compiled magic version [%d] " 368 "does not match with shared library magic version [%d]\n", 369 MAGIC_VERSION, magic_version()); 370 371 switch(action) { 372 case FILE_CHECK: 373 case FILE_COMPILE: 374 case FILE_LIST: 375 /* 376 * Don't try to check/compile ~/.magic unless we explicitly 377 * ask for it. 378 */ 379 magic = magic_open(flags|MAGIC_CHECK); 380 if (magic == NULL) { 381 file_warn("Can't create magic"); 382 return 1; 383 } 384 385 386 switch(action) { 387 case FILE_CHECK: 388 c = magic_check(magic, magicfile); 389 break; 390 case FILE_COMPILE: 391 c = magic_compile(magic, magicfile); 392 break; 393 case FILE_LIST: 394 c = magic_list(magic, magicfile); 395 break; 396 default: 397 abort(); 398 } 399 if (c == -1) { 400 file_warnx("%s", magic_error(magic)); 401 e = 1; 402 goto out; 403 } 404 goto out; 405 default: 406 if (magic == NULL) 407 if ((magic = load(magicfile, flags)) == NULL) 408 return 1; 409 applyparam(magic); 410 } 411 412 if (optind == argc) { 413 if (!didsomefiles) 414 usage(); 415 } 416 else { 417 size_t j, wid, nw; 418 for (wid = 0, j = CAST(size_t, optind); j < CAST(size_t, argc); 419 j++) { 420 nw = file_mbswidth(argv[j]); 421 if (nw > wid) 422 wid = nw; 423 } 424 /* 425 * If bflag is only set twice, set it depending on 426 * number of files [this is undocumented, and subject to change] 427 */ 428 if (bflag == 2) { 429 bflag = optind >= argc - 1; 430 } 431 for (; optind < argc; optind++) 432 e |= process(magic, argv[optind], wid); 433 } 434 435 out: 436 if (magic) 437 magic_close(magic); 438 return e; 439 } 440 441 private void 442 applyparam(magic_t magic) 443 { 444 size_t i; 445 446 for (i = 0; i < __arraycount(pm); i++) { 447 if (!pm[i].set) 448 continue; 449 if (magic_setparam(magic, pm[i].tag, &pm[i].value) == -1) 450 file_err(EXIT_FAILURE, "Can't set %s", pm[i].name); 451 } 452 } 453 454 private void 455 setparam(const char *p) 456 { 457 size_t i; 458 char *s; 459 460 if ((s = strchr(p, '=')) == NULL) 461 goto badparm; 462 463 for (i = 0; i < __arraycount(pm); i++) { 464 if (strncmp(p, pm[i].name, s - p) != 0) 465 continue; 466 pm[i].value = atoi(s + 1); 467 pm[i].set = 1; 468 return; 469 } 470 badparm: 471 file_errx(EXIT_FAILURE, "Unknown param %s", p); 472 } 473 474 private struct magic_set * 475 /*ARGSUSED*/ 476 load(const char *magicfile, int flags) 477 { 478 struct magic_set *magic = magic_open(flags); 479 const char *e; 480 481 if (magic == NULL) { 482 file_warn("Can't create magic"); 483 return NULL; 484 } 485 if (magic_load(magic, magicfile) == -1) { 486 file_warn("%s", magic_error(magic)); 487 magic_close(magic); 488 return NULL; 489 } 490 if ((e = magic_error(magic)) != NULL) 491 file_warn("%s", e); 492 return magic; 493 } 494 495 /* 496 * unwrap -- read a file of filenames, do each one. 497 */ 498 private int 499 unwrap(struct magic_set *ms, const char *fn) 500 { 501 FILE *f; 502 ssize_t len; 503 char *line = NULL; 504 size_t llen = 0; 505 int wid = 0, cwid; 506 int e = 0; 507 508 if (strcmp("-", fn) == 0) { 509 f = stdin; 510 wid = 1; 511 } else { 512 if ((f = fopen(fn, "r")) == NULL) { 513 file_warn("Cannot open `%s'", fn); 514 return 1; 515 } 516 517 while ((len = getline(&line, &llen, f)) > 0) { 518 if (line[len - 1] == '\n') 519 line[len - 1] = '\0'; 520 cwid = file_mbswidth(line); 521 if (cwid > wid) 522 wid = cwid; 523 } 524 525 rewind(f); 526 } 527 528 while ((len = getline(&line, &llen, f)) > 0) { 529 if (line[len - 1] == '\n') 530 line[len - 1] = '\0'; 531 e |= process(ms, line, wid); 532 if(nobuffer) 533 (void)fflush(stdout); 534 } 535 536 free(line); 537 (void)fclose(f); 538 return e; 539 } 540 541 /* 542 * Called for each input file on the command line (or in a list of files) 543 */ 544 private int 545 process(struct magic_set *ms, const char *inname, int wid) 546 { 547 const char *type, c = nulsep > 1 ? '\0' : '\n'; 548 int std_in = strcmp(inname, "-") == 0; 549 550 if (wid > 0 && !bflag) { 551 (void)printf("%s", std_in ? "/dev/stdin" : inname); 552 if (nulsep) 553 (void)putc('\0', stdout); 554 if (nulsep < 2) { 555 (void)printf("%s", separator); 556 (void)printf("%*s ", CAST(int, nopad ? 0 557 : (wid - file_mbswidth(inname))), ""); 558 } 559 } 560 561 type = magic_file(ms, std_in ? NULL : inname); 562 563 if (type == NULL) { 564 (void)printf("ERROR: %s%c", magic_error(ms), c); 565 return 1; 566 } else { 567 (void)printf("%s%c", type, c); 568 return 0; 569 } 570 } 571 572 protected size_t 573 file_mbswidth(const char *s) 574 { 575 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 576 size_t bytesconsumed, old_n, n, width = 0; 577 mbstate_t state; 578 wchar_t nextchar; 579 (void)memset(&state, 0, sizeof(mbstate_t)); 580 old_n = n = strlen(s); 581 582 while (n > 0) { 583 bytesconsumed = mbrtowc(&nextchar, s, n, &state); 584 if (bytesconsumed == CAST(size_t, -1) || 585 bytesconsumed == CAST(size_t, -2)) { 586 /* Something went wrong, return something reasonable */ 587 return old_n; 588 } 589 if (s[0] == '\n') { 590 /* 591 * do what strlen() would do, so that caller 592 * is always right 593 */ 594 width++; 595 } else { 596 int w = wcwidth(nextchar); 597 if (w > 0) 598 width += w; 599 } 600 601 s += bytesconsumed, n -= bytesconsumed; 602 } 603 return width; 604 #else 605 return strlen(s); 606 #endif 607 } 608 609 private void 610 usage(void) 611 { 612 const char *pn = file_getprogname(); 613 (void)fprintf(stderr, USAGE, pn, pn, pn); 614 exit(EXIT_FAILURE); 615 } 616 617 private void 618 defprint(int def) 619 { 620 if (!def) 621 return; 622 if (((def & 1) && posixly) || ((def & 2) && !posixly)) 623 fprintf(stdout, " (default)"); 624 fputc('\n', stdout); 625 } 626 627 private void 628 docprint(const char *opts, int def) 629 { 630 size_t i; 631 int comma, pad; 632 char *sp, *p; 633 634 p = strchr(opts, '%'); 635 if (p == NULL) { 636 fprintf(stdout, "%s", opts); 637 defprint(def); 638 return; 639 } 640 641 for (sp = p - 1; sp > opts && *sp == ' '; sp--) 642 continue; 643 644 fprintf(stdout, "%.*s", CAST(int, p - opts), opts); 645 pad = (int)CAST(int, p - sp - 1); 646 647 switch (*++p) { 648 case 'e': 649 comma = 0; 650 for (i = 0; i < __arraycount(nv); i++) { 651 fprintf(stdout, "%s%s", comma++ ? ", " : "", nv[i].name); 652 if (i && i % 5 == 0 && i != __arraycount(nv) - 1) { 653 fprintf(stdout, ",\n%*s", pad, ""); 654 comma = 0; 655 } 656 } 657 break; 658 case 'P': 659 for (i = 0; i < __arraycount(pm); i++) { 660 fprintf(stdout, "%9s %7zu %s", pm[i].name, pm[i].def, 661 pm[i].desc); 662 if (i != __arraycount(pm) - 1) 663 fprintf(stdout, "\n%*s", pad, ""); 664 } 665 break; 666 default: 667 file_errx(EXIT_FAILURE, "Unknown escape `%c' in long options", 668 *p); 669 break; 670 } 671 fprintf(stdout, "%s", opts + (p - opts) + 1); 672 673 } 674 675 private void 676 help(void) 677 { 678 (void)fputs( 679 "Usage: file [OPTION...] [FILE...]\n" 680 "Determine type of FILEs.\n" 681 "\n", stdout); 682 #define OPT(shortname, longname, opt, def, doc) \ 683 fprintf(stdout, " -%c, --" longname, shortname), \ 684 docprint(doc, def); 685 #define OPT_LONGONLY(longname, opt, def, doc, id) \ 686 fprintf(stdout, " --" longname), \ 687 docprint(doc, def); 688 #include "file_opts.h" 689 #undef OPT 690 #undef OPT_LONGONLY 691 fprintf(stdout, "\nReport bugs to https://bugs.astron.com/\n"); 692 exit(EXIT_SUCCESS); 693 } 694 695 private const char *file_progname; 696 697 protected void 698 file_setprogname(const char *progname) 699 { 700 file_progname = progname; 701 } 702 703 protected const char * 704 file_getprogname(void) 705 { 706 return file_progname; 707 } 708 709 protected void 710 file_err(int e, const char *fmt, ...) 711 { 712 va_list ap; 713 int se = errno; 714 715 va_start(ap, fmt); 716 fprintf(stderr, "%s: ", file_progname); 717 vfprintf(stderr, fmt, ap); 718 va_end(ap); 719 if (se) 720 fprintf(stderr, " (%s)\n", strerror(se)); 721 else 722 fputc('\n', stderr); 723 exit(e); 724 } 725 726 protected void 727 file_errx(int e, const char *fmt, ...) 728 { 729 va_list ap; 730 731 va_start(ap, fmt); 732 fprintf(stderr, "%s: ", file_progname); 733 vfprintf(stderr, fmt, ap); 734 va_end(ap); 735 fprintf(stderr, "\n"); 736 exit(e); 737 } 738 739 protected void 740 file_warn(const char *fmt, ...) 741 { 742 va_list ap; 743 int se = errno; 744 745 va_start(ap, fmt); 746 fprintf(stderr, "%s: ", file_progname); 747 vfprintf(stderr, fmt, ap); 748 va_end(ap); 749 if (se) 750 fprintf(stderr, " (%s)\n", strerror(se)); 751 else 752 fputc('\n', stderr); 753 errno = se; 754 } 755 756 protected void 757 file_warnx(const char *fmt, ...) 758 { 759 va_list ap; 760 int se = errno; 761 762 va_start(ap, fmt); 763 fprintf(stderr, "%s: ", file_progname); 764 vfprintf(stderr, fmt, ap); 765 va_end(ap); 766 fprintf(stderr, "\n"); 767 errno = se; 768 } 769