1 /* $NetBSD: file.c,v 1.16 2021/04/09 19:11:42 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * file - find type of a file or files - main program. 32 */ 33 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: file.c,v 1.189 2021/02/05 21:33:49 christos Exp $") 39 #else 40 __RCSID("$NetBSD: file.c,v 1.16 2021/04/09 19:11:42 christos Exp $"); 41 #endif 42 #endif /* lint */ 43 44 #include "magic.h" 45 46 #include <stdlib.h> 47 #include <unistd.h> 48 #include <string.h> 49 #ifdef RESTORE_TIME 50 # if (__COHERENT__ >= 0x420) 51 # include <sys/utime.h> 52 # else 53 # ifdef USE_UTIMES 54 # include <sys/time.h> 55 # else 56 # include <utime.h> 57 # endif 58 # endif 59 #endif 60 #ifdef HAVE_UNISTD_H 61 #include <unistd.h> /* for read() */ 62 #endif 63 #ifdef HAVE_WCHAR_H 64 #include <wchar.h> 65 #endif 66 67 #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION) 68 # include <getopt.h> 69 # ifndef HAVE_GETOPT_LONG 70 int getopt_long(int, char * const *, const char *, 71 const struct option *, int *); 72 # endif 73 # else 74 # include "mygetopt.h" 75 #endif 76 77 #ifdef S_IFLNK 78 # define IFLNK_h "h" 79 # define IFLNK_L "L" 80 #else 81 # define IFLNK_h "" 82 # define IFLNK_L "" 83 #endif 84 85 #define FILE_FLAGS "bcCdE" IFLNK_h "ik" IFLNK_L "lNnprsSvzZ0" 86 #define OPTSTRING "bcCde:Ef:F:hiklLm:nNpP:rsSvzZ0" 87 88 # define USAGE \ 89 "Usage: %s [-" FILE_FLAGS "] [--apple] [--extension] [--mime-encoding]\n" \ 90 " [--mime-type] [-e <testname>] [-F <separator>] " \ 91 " [-f <namefile>]\n" \ 92 " [-m <magicfiles>] [-P <parameter=value>] [--exclude-quiet]\n" \ 93 " <file> ...\n" \ 94 " %s -C [-m <magicfiles>]\n" \ 95 " %s [--help]\n" 96 97 private int /* Global command-line options */ 98 bflag = 0, /* brief output format */ 99 nopad = 0, /* Don't pad output */ 100 nobuffer = 0, /* Do not buffer stdout */ 101 nulsep = 0; /* Append '\0' to the separator */ 102 103 private const char *separator = ":"; /* Default field separator */ 104 private const struct option long_options[] = { 105 #define OPT_HELP 1 106 #define OPT_APPLE 2 107 #define OPT_EXTENSIONS 3 108 #define OPT_MIME_TYPE 4 109 #define OPT_MIME_ENCODING 5 110 #define OPT_EXCLUDE_QUIET 6 111 #define OPT(shortname, longname, opt, def, doc) \ 112 {longname, opt, NULL, shortname}, 113 #define OPT_LONGONLY(longname, opt, def, doc, id) \ 114 {longname, opt, NULL, id}, 115 #include "file_opts.h" 116 #undef OPT 117 #undef OPT_LONGONLY 118 {0, 0, NULL, 0} 119 }; 120 121 private const struct { 122 const char *name; 123 int value; 124 } nv[] = { 125 { "apptype", MAGIC_NO_CHECK_APPTYPE }, 126 { "ascii", MAGIC_NO_CHECK_ASCII }, 127 { "cdf", MAGIC_NO_CHECK_CDF }, 128 { "compress", MAGIC_NO_CHECK_COMPRESS }, 129 { "csv", MAGIC_NO_CHECK_CSV }, 130 { "elf", MAGIC_NO_CHECK_ELF }, 131 { "encoding", MAGIC_NO_CHECK_ENCODING }, 132 { "soft", MAGIC_NO_CHECK_SOFT }, 133 { "tar", MAGIC_NO_CHECK_TAR }, 134 { "json", MAGIC_NO_CHECK_JSON }, 135 { "text", MAGIC_NO_CHECK_TEXT }, /* synonym for ascii */ 136 { "tokens", MAGIC_NO_CHECK_TOKENS }, /* OBSOLETE: ignored for backwards compatibility */ 137 }; 138 139 private struct { 140 const char *name; 141 int tag; 142 size_t value; 143 int set; 144 size_t def; 145 const char *desc; 146 } pm[] = { 147 { "bytes", MAGIC_PARAM_BYTES_MAX, 0, 0, FILE_BYTES_MAX, 148 "max bytes to look inside file" }, 149 { "elf_notes", MAGIC_PARAM_ELF_NOTES_MAX, 0, 0, FILE_ELF_NOTES_MAX, 150 "max ELF notes processed" }, 151 { "elf_phnum", MAGIC_PARAM_ELF_PHNUM_MAX, 0, 0, FILE_ELF_PHNUM_MAX, 152 "max ELF prog sections processed" }, 153 { "elf_shnum", MAGIC_PARAM_ELF_SHNUM_MAX, 0, 0, FILE_ELF_SHNUM_MAX, 154 "max ELF sections processed" }, 155 { "encoding", MAGIC_PARAM_ENCODING_MAX, 0, 0, FILE_ENCODING_MAX, 156 "max bytes to scan for encoding" }, 157 { "indir", MAGIC_PARAM_INDIR_MAX, 0, 0, FILE_INDIR_MAX, 158 "recursion limit for indirection" }, 159 { "name", MAGIC_PARAM_NAME_MAX, 0, 0, FILE_NAME_MAX, 160 "use limit for name/use magic" }, 161 { "regex", MAGIC_PARAM_REGEX_MAX, 0, 0, FILE_REGEX_MAX, 162 "length limit for REGEX searches" }, 163 }; 164 165 private int posixly; 166 167 #ifdef __dead 168 __dead 169 #endif 170 private void usage(void); 171 private void docprint(const char *, int); 172 #ifdef __dead 173 __dead 174 #endif 175 private void help(void); 176 177 private int unwrap(struct magic_set *, const char *); 178 private int process(struct magic_set *ms, const char *, int); 179 private struct magic_set *load(const char *, int); 180 private void setparam(const char *); 181 private void applyparam(magic_t); 182 183 184 /* 185 * main - parse arguments and handle options 186 */ 187 int 188 main(int argc, char *argv[]) 189 { 190 int c; 191 size_t i; 192 int action = 0, didsomefiles = 0, errflg = 0; 193 int flags = 0, e = 0; 194 #ifdef HAVE_LIBSECCOMP 195 int sandbox = 1; 196 #endif 197 struct magic_set *magic = NULL; 198 int longindex; 199 const char *magicfile = NULL; /* where the magic is */ 200 char *progname; 201 202 /* makes islower etc work for other langs */ 203 (void)setlocale(LC_CTYPE, ""); 204 205 #ifdef __EMX__ 206 /* sh-like wildcard expansion! Shouldn't hurt at least ... */ 207 _wildcard(&argc, &argv); 208 #endif 209 210 if ((progname = strrchr(argv[0], '/')) != NULL) 211 progname++; 212 else 213 progname = argv[0]; 214 215 file_setprogname(progname); 216 217 218 #ifdef S_IFLNK 219 posixly = getenv("POSIXLY_CORRECT") != NULL; 220 flags |= posixly ? MAGIC_SYMLINK : 0; 221 #endif 222 while ((c = getopt_long(argc, argv, OPTSTRING, long_options, 223 &longindex)) != -1) 224 switch (c) { 225 case OPT_HELP: 226 help(); 227 break; 228 case OPT_APPLE: 229 flags |= MAGIC_APPLE; 230 break; 231 case OPT_EXTENSIONS: 232 flags |= MAGIC_EXTENSION; 233 break; 234 case OPT_MIME_TYPE: 235 flags |= MAGIC_MIME_TYPE; 236 break; 237 case OPT_MIME_ENCODING: 238 flags |= MAGIC_MIME_ENCODING; 239 break; 240 case '0': 241 nulsep++; 242 break; 243 case 'b': 244 bflag++; 245 break; 246 case 'c': 247 action = FILE_CHECK; 248 break; 249 case 'C': 250 action = FILE_COMPILE; 251 break; 252 case 'd': 253 flags |= MAGIC_DEBUG|MAGIC_CHECK; 254 break; 255 case 'E': 256 flags |= MAGIC_ERROR; 257 break; 258 case 'e': 259 case OPT_EXCLUDE_QUIET: 260 for (i = 0; i < __arraycount(nv); i++) 261 if (strcmp(nv[i].name, optarg) == 0) 262 break; 263 264 if (i == __arraycount(nv)) { 265 if (c != OPT_EXCLUDE_QUIET) 266 errflg++; 267 } else 268 flags |= nv[i].value; 269 break; 270 271 case 'f': 272 if(action) 273 usage(); 274 if (magic == NULL) 275 if ((magic = load(magicfile, flags)) == NULL) 276 return 1; 277 applyparam(magic); 278 e |= unwrap(magic, optarg); 279 ++didsomefiles; 280 break; 281 case 'F': 282 separator = optarg; 283 break; 284 case 'i': 285 flags |= MAGIC_MIME; 286 break; 287 case 'k': 288 flags |= MAGIC_CONTINUE; 289 break; 290 case 'l': 291 action = FILE_LIST; 292 break; 293 case 'm': 294 magicfile = optarg; 295 break; 296 case 'n': 297 ++nobuffer; 298 break; 299 case 'N': 300 ++nopad; 301 break; 302 #if defined(HAVE_UTIME) || defined(HAVE_UTIMES) 303 case 'p': 304 flags |= MAGIC_PRESERVE_ATIME; 305 break; 306 #endif 307 case 'P': 308 setparam(optarg); 309 break; 310 case 'r': 311 flags |= MAGIC_RAW; 312 break; 313 case 's': 314 flags |= MAGIC_DEVICES; 315 break; 316 case 'S': 317 #ifdef HAVE_LIBSECCOMP 318 sandbox = 0; 319 #endif 320 break; 321 case 'v': 322 if (magicfile == NULL) 323 magicfile = magic_getpath(magicfile, action); 324 (void)fprintf(stdout, "%s-%s\n", file_getprogname(), 325 VERSION); 326 (void)fprintf(stdout, "magic file from %s\n", 327 magicfile); 328 #ifdef HAVE_LIBSECCOMP 329 (void)fprintf(stdout, "seccomp support included\n"); 330 #endif 331 return 0; 332 case 'z': 333 flags |= MAGIC_COMPRESS; 334 break; 335 336 case 'Z': 337 flags |= MAGIC_COMPRESS|MAGIC_COMPRESS_TRANSP; 338 break; 339 #ifdef S_IFLNK 340 case 'L': 341 flags |= MAGIC_SYMLINK; 342 break; 343 case 'h': 344 flags &= ~MAGIC_SYMLINK; 345 break; 346 #endif 347 case '?': 348 default: 349 errflg++; 350 break; 351 } 352 353 if (errflg) { 354 usage(); 355 } 356 if (e) 357 return e; 358 359 #ifdef HAVE_LIBSECCOMP 360 #if 0 361 if (sandbox && enable_sandbox_basic() == -1) 362 #else 363 if (sandbox && enable_sandbox_full() == -1) 364 #endif 365 file_err(EXIT_FAILURE, "SECCOMP initialisation failed"); 366 #endif /* HAVE_LIBSECCOMP */ 367 368 if (MAGIC_VERSION != magic_version()) 369 file_warnx("Compiled magic version [%d] " 370 "does not match with shared library magic version [%d]\n", 371 MAGIC_VERSION, magic_version()); 372 373 switch(action) { 374 case FILE_CHECK: 375 case FILE_COMPILE: 376 case FILE_LIST: 377 /* 378 * Don't try to check/compile ~/.magic unless we explicitly 379 * ask for it. 380 */ 381 magic = magic_open(flags|MAGIC_CHECK); 382 if (magic == NULL) { 383 file_warn("Can't create magic"); 384 return 1; 385 } 386 387 388 switch(action) { 389 case FILE_CHECK: 390 c = magic_check(magic, magicfile); 391 break; 392 case FILE_COMPILE: 393 c = magic_compile(magic, magicfile); 394 break; 395 case FILE_LIST: 396 c = magic_list(magic, magicfile); 397 break; 398 default: 399 abort(); 400 } 401 if (c == -1) { 402 file_warnx("%s", magic_error(magic)); 403 e = 1; 404 goto out; 405 } 406 goto out; 407 default: 408 if (magic == NULL) 409 if ((magic = load(magicfile, flags)) == NULL) 410 return 1; 411 applyparam(magic); 412 } 413 414 if (optind == argc) { 415 if (!didsomefiles) 416 usage(); 417 } 418 else { 419 size_t j, wid, nw; 420 for (wid = 0, j = CAST(size_t, optind); j < CAST(size_t, argc); 421 j++) { 422 nw = file_mbswidth(argv[j]); 423 if (nw > wid) 424 wid = nw; 425 } 426 /* 427 * If bflag is only set twice, set it depending on 428 * number of files [this is undocumented, and subject to change] 429 */ 430 if (bflag == 2) { 431 bflag = optind >= argc - 1; 432 } 433 for (; optind < argc; optind++) 434 e |= process(magic, argv[optind], wid); 435 } 436 437 out: 438 if (magic) 439 magic_close(magic); 440 return e; 441 } 442 443 private void 444 applyparam(magic_t magic) 445 { 446 size_t i; 447 448 for (i = 0; i < __arraycount(pm); i++) { 449 if (!pm[i].set) 450 continue; 451 if (magic_setparam(magic, pm[i].tag, &pm[i].value) == -1) 452 file_err(EXIT_FAILURE, "Can't set %s", pm[i].name); 453 } 454 } 455 456 private void 457 setparam(const char *p) 458 { 459 size_t i; 460 char *s; 461 462 if ((s = strchr(p, '=')) == NULL) 463 goto badparm; 464 465 for (i = 0; i < __arraycount(pm); i++) { 466 if (strncmp(p, pm[i].name, s - p) != 0) 467 continue; 468 pm[i].value = atoi(s + 1); 469 pm[i].set = 1; 470 return; 471 } 472 badparm: 473 file_errx(EXIT_FAILURE, "Unknown param %s", p); 474 } 475 476 private struct magic_set * 477 /*ARGSUSED*/ 478 load(const char *magicfile, int flags) 479 { 480 struct magic_set *magic = magic_open(flags); 481 const char *e; 482 483 if (magic == NULL) { 484 file_warn("Can't create magic"); 485 return NULL; 486 } 487 if (magic_load(magic, magicfile) == -1) { 488 file_warn("%s", magic_error(magic)); 489 magic_close(magic); 490 return NULL; 491 } 492 if ((e = magic_error(magic)) != NULL) 493 file_warn("%s", e); 494 return magic; 495 } 496 497 /* 498 * unwrap -- read a file of filenames, do each one. 499 */ 500 private int 501 unwrap(struct magic_set *ms, const char *fn) 502 { 503 FILE *f; 504 ssize_t len; 505 char *line = NULL; 506 size_t llen = 0; 507 int wid = 0, cwid; 508 int e = 0; 509 510 if (strcmp("-", fn) == 0) { 511 f = stdin; 512 wid = 1; 513 } else { 514 if ((f = fopen(fn, "r")) == NULL) { 515 file_warn("Cannot open `%s'", fn); 516 return 1; 517 } 518 519 while ((len = getline(&line, &llen, f)) > 0) { 520 if (line[len - 1] == '\n') 521 line[len - 1] = '\0'; 522 cwid = file_mbswidth(line); 523 if (cwid > wid) 524 wid = cwid; 525 } 526 527 rewind(f); 528 } 529 530 while ((len = getline(&line, &llen, f)) > 0) { 531 if (line[len - 1] == '\n') 532 line[len - 1] = '\0'; 533 e |= process(ms, line, wid); 534 } 535 536 free(line); 537 (void)fclose(f); 538 return e; 539 } 540 541 /* 542 * Called for each input file on the command line (or in a list of files) 543 */ 544 private int 545 process(struct magic_set *ms, const char *inname, int wid) 546 { 547 const char *type, c = nulsep > 1 ? '\0' : '\n'; 548 int std_in = strcmp(inname, "-") == 0; 549 550 if (wid > 0 && !bflag) { 551 (void)printf("%s", std_in ? "/dev/stdin" : inname); 552 if (nulsep) 553 (void)putc('\0', stdout); 554 if (nulsep < 2) { 555 (void)printf("%s", separator); 556 (void)printf("%*s ", CAST(int, nopad ? 0 557 : (wid - file_mbswidth(inname))), ""); 558 } 559 } 560 561 type = magic_file(ms, std_in ? NULL : inname); 562 563 if (type == NULL) { 564 (void)printf("ERROR: %s%c", magic_error(ms), c); 565 } else { 566 (void)printf("%s%c", type, c); 567 } 568 if (nobuffer) 569 (void)fflush(stdout); 570 return type == NULL; 571 } 572 573 protected size_t 574 file_mbswidth(const char *s) 575 { 576 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 577 size_t bytesconsumed, old_n, n, width = 0; 578 mbstate_t state; 579 wchar_t nextchar; 580 (void)memset(&state, 0, sizeof(mbstate_t)); 581 old_n = n = strlen(s); 582 583 while (n > 0) { 584 bytesconsumed = mbrtowc(&nextchar, s, n, &state); 585 if (bytesconsumed == CAST(size_t, -1) || 586 bytesconsumed == CAST(size_t, -2)) { 587 /* Something went wrong, return something reasonable */ 588 return old_n; 589 } 590 if (s[0] == '\n') { 591 /* 592 * do what strlen() would do, so that caller 593 * is always right 594 */ 595 width++; 596 } else { 597 int w = wcwidth(nextchar); 598 if (w > 0) 599 width += w; 600 } 601 602 s += bytesconsumed, n -= bytesconsumed; 603 } 604 return width; 605 #else 606 return strlen(s); 607 #endif 608 } 609 610 private void 611 usage(void) 612 { 613 const char *pn = file_getprogname(); 614 (void)fprintf(stderr, USAGE, pn, pn, pn); 615 exit(EXIT_FAILURE); 616 } 617 618 private void 619 defprint(int def) 620 { 621 if (!def) 622 return; 623 if (((def & 1) && posixly) || ((def & 2) && !posixly)) 624 fprintf(stdout, " (default)"); 625 fputc('\n', stdout); 626 } 627 628 private void 629 docprint(const char *opts, int def) 630 { 631 size_t i; 632 int comma, pad; 633 char *sp, *p; 634 635 p = strchr(opts, '%'); 636 if (p == NULL) { 637 fprintf(stdout, "%s", opts); 638 defprint(def); 639 return; 640 } 641 642 for (sp = p - 1; sp > opts && *sp == ' '; sp--) 643 continue; 644 645 fprintf(stdout, "%.*s", CAST(int, p - opts), opts); 646 pad = (int)CAST(int, p - sp - 1); 647 648 switch (*++p) { 649 case 'e': 650 comma = 0; 651 for (i = 0; i < __arraycount(nv); i++) { 652 fprintf(stdout, "%s%s", comma++ ? ", " : "", nv[i].name); 653 if (i && i % 5 == 0 && i != __arraycount(nv) - 1) { 654 fprintf(stdout, ",\n%*s", pad, ""); 655 comma = 0; 656 } 657 } 658 break; 659 case 'P': 660 for (i = 0; i < __arraycount(pm); i++) { 661 fprintf(stdout, "%9s %7zu %s", pm[i].name, pm[i].def, 662 pm[i].desc); 663 if (i != __arraycount(pm) - 1) 664 fprintf(stdout, "\n%*s", pad, ""); 665 } 666 break; 667 default: 668 file_errx(EXIT_FAILURE, "Unknown escape `%c' in long options", 669 *p); 670 break; 671 } 672 fprintf(stdout, "%s", opts + (p - opts) + 1); 673 674 } 675 676 private void 677 help(void) 678 { 679 (void)fputs( 680 "Usage: file [OPTION...] [FILE...]\n" 681 "Determine type of FILEs.\n" 682 "\n", stdout); 683 #define OPT(shortname, longname, opt, def, doc) \ 684 fprintf(stdout, " -%c, --" longname, shortname), \ 685 docprint(doc, def); 686 #define OPT_LONGONLY(longname, opt, def, doc, id) \ 687 fprintf(stdout, " --" longname), \ 688 docprint(doc, def); 689 #include "file_opts.h" 690 #undef OPT 691 #undef OPT_LONGONLY 692 fprintf(stdout, "\nReport bugs to https://bugs.astron.com/\n"); 693 exit(EXIT_SUCCESS); 694 } 695 696 private const char *file_progname; 697 698 protected void 699 file_setprogname(const char *progname) 700 { 701 file_progname = progname; 702 } 703 704 protected const char * 705 file_getprogname(void) 706 { 707 return file_progname; 708 } 709 710 protected void 711 file_err(int e, const char *fmt, ...) 712 { 713 va_list ap; 714 int se = errno; 715 716 va_start(ap, fmt); 717 fprintf(stderr, "%s: ", file_progname); 718 vfprintf(stderr, fmt, ap); 719 va_end(ap); 720 if (se) 721 fprintf(stderr, " (%s)\n", strerror(se)); 722 else 723 fputc('\n', stderr); 724 exit(e); 725 } 726 727 protected void 728 file_errx(int e, const char *fmt, ...) 729 { 730 va_list ap; 731 732 va_start(ap, fmt); 733 fprintf(stderr, "%s: ", file_progname); 734 vfprintf(stderr, fmt, ap); 735 va_end(ap); 736 fprintf(stderr, "\n"); 737 exit(e); 738 } 739 740 protected void 741 file_warn(const char *fmt, ...) 742 { 743 va_list ap; 744 int se = errno; 745 746 va_start(ap, fmt); 747 fprintf(stderr, "%s: ", file_progname); 748 vfprintf(stderr, fmt, ap); 749 va_end(ap); 750 if (se) 751 fprintf(stderr, " (%s)\n", strerror(se)); 752 else 753 fputc('\n', stderr); 754 errno = se; 755 } 756 757 protected void 758 file_warnx(const char *fmt, ...) 759 { 760 va_list ap; 761 int se = errno; 762 763 va_start(ap, fmt); 764 fprintf(stderr, "%s: ", file_progname); 765 vfprintf(stderr, fmt, ap); 766 va_end(ap); 767 fprintf(stderr, "\n"); 768 errno = se; 769 } 770