xref: /openbsd-src/usr.bin/file/file.c (revision d874cce4b1d9fe6b41c9e4f2117a77d8a4a37b92)
1 /*	$OpenBSD: file.c,v 1.18 2008/05/08 01:40:56 chl Exp $ */
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * file - find type of a file or files - main program.
31  */
32 
33 #include "file.h"
34 #include "magic.h"
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <unistd.h>
39 #include <string.h>
40 #include <sys/types.h>
41 #include <sys/param.h>	/* for MAXPATHLEN */
42 #include <sys/stat.h>
43 #ifdef RESTORE_TIME
44 # if (__COHERENT__ >= 0x420)
45 #  include <sys/utime.h>
46 # else
47 #  ifdef USE_UTIMES
48 #   include <sys/time.h>
49 #  else
50 #   include <utime.h>
51 #  endif
52 # endif
53 #endif
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>	/* for read() */
56 #endif
57 #ifdef HAVE_LOCALE_H
58 #include <locale.h>
59 #endif
60 #ifdef HAVE_WCHAR_H
61 #include <wchar.h>
62 #endif
63 
64 #ifdef HAVE_GETOPT_H
65 #include <getopt.h>	/* for long options (is this portable?)*/
66 #else
67 #undef HAVE_GETOPT_LONG
68 #endif
69 
70 #include <netinet/in.h>		/* for byte swapping */
71 
72 #include "patchlevel.h"
73 
74 #ifndef	lint
75 FILE_RCSID("@(#)$Id: file.c,v 1.18 2008/05/08 01:40:56 chl Exp $")
76 #endif	/* lint */
77 
78 
79 #ifdef S_IFLNK
80 #define SYMLINKFLAG "Lh"
81 #else
82 #define SYMLINKFLAG ""
83 #endif
84 
85 # define USAGE  "Usage: %s [-bck" SYMLINKFLAG "nNrsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n" \
86 		" 	%s [-m magicfiles] -C\n"
87 
88 #ifndef MAXPATHLEN
89 #define	MAXPATHLEN	512
90 #endif
91 
92 private int 		/* Global command-line options 		*/
93 	bflag = 0,	/* brief output format	 		*/
94 	nopad = 0,	/* Don't pad output			*/
95 	nobuffer = 0,   /* Do not buffer stdout 		*/
96 	nulsep = 0;	/* Append '\0' to the separator		*/
97 
98 private const char *magicfile = 0;	/* where the magic is	*/
99 private const char *default_magicfile = MAGIC;
100 private const char *separator = ":";	/* Default field separator	*/
101 
102 private struct magic_set *magic;
103 extern char *__progname;
104 
105 private void unwrap(char *);
106 private void usage(void);
107 #ifdef HAVE_GETOPT_LONG
108 private void help(void);
109 #endif
110 #if 0
111 private int byteconv4(int, int, int);
112 private short byteconv2(int, int, int);
113 #endif
114 
115 int main(int, char *[]);
116 private void process(const char *, int);
117 private void load(const char *, int);
118 
119 
120 /*
121  * main - parse arguments and handle options
122  */
123 int
124 main(int argc, char *argv[])
125 {
126 	int c, i;
127 	int action = 0, didsomefiles = 0, errflg = 0;
128 	int flags = 0;
129 	char *home, *usermagic;
130 	struct stat sb;
131 	static const char hmagic[] = "/.magic";
132 #define OPTSTRING	"bcCde:f:F:hkLm:nNprsvz0"
133 #ifdef HAVE_GETOPT_LONG
134 	int longindex;
135 	static const struct option long_options[] =
136 	{
137 		{"version", 0, 0, 'v'},
138 		{"help", 0, 0, 0},
139 		{"brief", 0, 0, 'b'},
140 		{"checking-printout", 0, 0, 'c'},
141 		{"debug", 0, 0, 'd'},
142 		{"exclude", 1, 0, 'e' },
143 		{"files-from", 1, 0, 'f'},
144 		{"separator", 1, 0, 'F'},
145 		{"keep-going", 0, 0, 'k'},
146 #ifdef S_IFLNK
147 		{"dereference", 0, 0, 'L'},
148 		{"no-dereference", 0, 0, 'h'},
149 #endif
150 		{"magic-file", 1, 0, 'm'},
151 #if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
152 		{"preserve-date", 0, 0, 'p'},
153 #endif
154 		{"uncompress", 0, 0, 'z'},
155 		{"raw", 0, 0, 'r'},
156 		{"no-buffer", 0, 0, 'n'},
157 		{"no-pad", 0, 0, 'N'},
158 		{"special-files", 0, 0, 's'},
159 		{"compile", 0, 0, 'C'},
160 		{"print0", 0, 0, '0'},
161 		{0, 0, 0, 0},
162 	};
163 #endif
164 
165 	static const struct {
166 		const char *name;
167 		int value;
168 	} nv[] = {
169 		{ "apptype",	MAGIC_NO_CHECK_APPTYPE },
170 		{ "ascii",	MAGIC_NO_CHECK_ASCII },
171 		{ "compress",	MAGIC_NO_CHECK_COMPRESS },
172 		{ "elf",	MAGIC_NO_CHECK_ELF },
173 		{ "fortran",	MAGIC_NO_CHECK_FORTRAN },
174 		{ "soft",	MAGIC_NO_CHECK_SOFT },
175 		{ "tar",	MAGIC_NO_CHECK_TAR },
176 		{ "tokens",	MAGIC_NO_CHECK_TOKENS },
177 		{ "troff",	MAGIC_NO_CHECK_TROFF },
178 	};
179 
180 #ifdef LC_CTYPE
181 	/* makes islower etc work for other langs */
182 	(void)setlocale(LC_CTYPE, "");
183 #endif
184 
185 #ifdef __EMX__
186 	/* sh-like wildcard expansion! Shouldn't hurt at least ... */
187 	_wildcard(&argc, &argv);
188 #endif
189 
190 	magicfile = default_magicfile;
191 	if ((usermagic = getenv("MAGIC")) != NULL)
192 		magicfile = usermagic;
193 	else
194 		if ((home = getenv("HOME")) != NULL) {
195 			size_t len = strlen(home) + sizeof(hmagic);
196 			if ((usermagic = malloc(len)) != NULL) {
197 				(void)strlcpy(usermagic, home, len);
198 				(void)strlcat(usermagic, hmagic, len);
199 				if (stat(usermagic, &sb)<0)
200 					free(usermagic);
201 				else
202 					magicfile = usermagic;
203 			}
204 		}
205 
206 #ifdef S_IFLNK
207 	flags |= getenv("POSIXLY_CORRECT") ? MAGIC_SYMLINK : 0;
208 #endif
209 #ifndef HAVE_GETOPT_LONG
210 	while ((c = getopt(argc, argv, OPTSTRING)) != -1)
211 #else
212 	while ((c = getopt_long(argc, argv, OPTSTRING, long_options,
213 	    &longindex)) != -1)
214 #endif
215 		switch (c) {
216 #ifdef HAVE_GETOPT_LONG
217 		case 0 :
218 			if (longindex == 1)
219 				help();
220 			break;
221 #endif
222 		case '0':
223 			nulsep = 1;
224 			break;
225 		case 'b':
226 			++bflag;
227 			break;
228 		case 'c':
229 			action = FILE_CHECK;
230 			break;
231 		case 'C':
232 			action = FILE_COMPILE;
233 			break;
234 		case 'd':
235 			flags |= MAGIC_DEBUG|MAGIC_CHECK;
236 			break;
237 		case 'e':
238 			for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++)
239 				if (strcmp(nv[i].name, optarg) == 0)
240 					break;
241 
242 			if (i == sizeof(nv) / sizeof(nv[0]))
243 				errflg++;
244 			else
245 				flags |= nv[i].value;
246 			break;
247 
248 		case 'f':
249 			if(action)
250 				usage();
251 			load(magicfile, flags);
252 			unwrap(optarg);
253 			++didsomefiles;
254 			break;
255 		case 'F':
256 			separator = optarg;
257 			break;
258 		case 'k':
259 			flags |= MAGIC_CONTINUE;
260 			break;
261 		case 'm':
262 			magicfile = optarg;
263 			break;
264 		case 'n':
265 			++nobuffer;
266 			break;
267 		case 'N':
268 			++nopad;
269 			break;
270 #if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
271 		case 'p':
272 			flags |= MAGIC_PRESERVE_ATIME;
273 			break;
274 #endif
275 		case 'r':
276 			flags |= MAGIC_RAW;
277 			break;
278 		case 's':
279 			flags |= MAGIC_DEVICES;
280 			break;
281 		case 'v':
282 			(void)fprintf(stdout, "%s-%d.%.2d\n", __progname,
283 				       FILE_VERSION_MAJOR, patchlevel);
284 			(void)fprintf(stdout, "magic file from %s\n",
285 				       magicfile);
286 			return 1;
287 		case 'z':
288 			flags |= MAGIC_COMPRESS;
289 			break;
290 #ifdef S_IFLNK
291 		case 'L':
292 			flags |= MAGIC_SYMLINK;
293 			break;
294 		case 'h':
295 			flags &= ~MAGIC_SYMLINK;
296 			break;
297 #endif
298 		case '?':
299 		default:
300 			errflg++;
301 			break;
302 		}
303 
304 	if (errflg) {
305 		usage();
306 	}
307 
308 	switch(action) {
309 	case FILE_CHECK:
310 	case FILE_COMPILE:
311 		magic = magic_open(flags|MAGIC_CHECK);
312 		if (magic == NULL) {
313 			(void)fprintf(stderr, "%s: %s\n", __progname,
314 			    strerror(errno));
315 			return 1;
316 		}
317 		c = action == FILE_CHECK ? magic_check(magic, magicfile) :
318 		    magic_compile(magic, magicfile);
319 		if (c == -1) {
320 			(void)fprintf(stderr, "%s: %s\n", __progname,
321 			    magic_error(magic));
322 			return -1;
323 		}
324 		return 0;
325 	default:
326 		load(magicfile, flags);
327 		break;
328 	}
329 
330 	if (optind == argc) {
331 		if (!didsomefiles) {
332 			usage();
333 		}
334 	}
335 	else {
336 		int i, wid, nw;
337 		for (wid = 0, i = optind; i < argc; i++) {
338 			nw = file_mbswidth(argv[i]);
339 			if (nw > wid)
340 				wid = nw;
341 		}
342 		for (; optind < argc; optind++)
343 			process(argv[optind], wid);
344 	}
345 
346 	magic_close(magic);
347 	return 0;
348 }
349 
350 
351 private void
352 /*ARGSUSED*/
353 load(const char *m, int flags)
354 {
355 	if (magic || m == NULL)
356 		return;
357 	magic = magic_open(flags);
358 	if (magic == NULL) {
359 		(void)fprintf(stderr, "%s: %s\n", __progname, strerror(errno));
360 		exit(1);
361 	}
362 	if (magic_load(magic, magicfile) == -1) {
363 		(void)fprintf(stderr, "%s: %s\n",
364 		    __progname, magic_error(magic));
365 		exit(1);
366 	}
367 }
368 
369 /*
370  * unwrap -- read a file of filenames, do each one.
371  */
372 private void
373 unwrap(char *fn)
374 {
375 	char buf[MAXPATHLEN];
376 	FILE *f;
377 	int wid = 0, cwid;
378 
379 	if (strcmp("-", fn) == 0) {
380 		f = stdin;
381 		wid = 1;
382 	} else {
383 		if ((f = fopen(fn, "r")) == NULL) {
384 			(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
385 			    __progname, fn, strerror(errno));
386 			exit(1);
387 		}
388 
389 		while (fgets(buf, sizeof(buf), f) != NULL) {
390 			buf[strcspn(buf, "\n")] = '\0';
391 			cwid = file_mbswidth(buf);
392 			if (cwid > wid)
393 				wid = cwid;
394 		}
395 
396 		rewind(f);
397 	}
398 
399 	while (fgets(buf, sizeof(buf), f) != NULL) {
400 		buf[strcspn(buf, "\n")] = '\0';
401 		process(buf, wid);
402 		if(nobuffer)
403 			(void)fflush(stdout);
404 	}
405 
406 	(void)fclose(f);
407 }
408 
409 /*
410  * Called for each input file on the command line (or in a list of files)
411  */
412 private void
413 process(const char *inname, int wid)
414 {
415 	const char *type;
416 	int std_in = strcmp(inname, "-") == 0;
417 
418 	if (wid > 0 && !bflag) {
419 		(void)printf("%s", std_in ? "/dev/stdin" : inname);
420 		if (nulsep)
421 			(void)putc('\0', stdout);
422 		else
423 			(void)printf("%s", separator);
424 		(void)printf("%*s ",
425 		    (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
426 	}
427 
428 	type = magic_file(magic, std_in ? NULL : inname);
429 	if (type == NULL)
430 		(void)printf("ERROR: %s\n", magic_error(magic));
431 	else
432 		(void)printf("%s\n", type);
433 }
434 
435 
436 #if 0
437 /*
438  * byteconv4
439  * Input:
440  *	from		4 byte quantity to convert
441  *	same		whether to perform byte swapping
442  *	big_endian	whether we are a big endian host
443  */
444 private int
445 byteconv4(int from, int same, int big_endian)
446 {
447 	if (same)
448 		return from;
449 	else if (big_endian) {		/* lsb -> msb conversion on msb */
450 		union {
451 			int i;
452 			char c[4];
453 		} retval, tmpval;
454 
455 		tmpval.i = from;
456 		retval.c[0] = tmpval.c[3];
457 		retval.c[1] = tmpval.c[2];
458 		retval.c[2] = tmpval.c[1];
459 		retval.c[3] = tmpval.c[0];
460 
461 		return retval.i;
462 	}
463 	else
464 		return ntohl(from);	/* msb -> lsb conversion on lsb */
465 }
466 
467 /*
468  * byteconv2
469  * Same as byteconv4, but for shorts
470  */
471 private short
472 byteconv2(int from, int same, int big_endian)
473 {
474 	if (same)
475 		return from;
476 	else if (big_endian) {		/* lsb -> msb conversion on msb */
477 		union {
478 			short s;
479 			char c[2];
480 		} retval, tmpval;
481 
482 		tmpval.s = (short) from;
483 		retval.c[0] = tmpval.c[1];
484 		retval.c[1] = tmpval.c[0];
485 
486 		return retval.s;
487 	}
488 	else
489 		return ntohs(from);	/* msb -> lsb conversion on lsb */
490 }
491 #endif
492 
493 size_t
494 file_mbswidth(const char *s)
495 {
496 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
497 	size_t bytesconsumed, old_n, n, width = 0;
498 	mbstate_t state;
499 	wchar_t nextchar;
500 	(void)memset(&state, 0, sizeof(mbstate_t));
501 	old_n = n = strlen(s);
502 
503 	while (n > 0) {
504 		bytesconsumed = mbrtowc(&nextchar, s, n, &state);
505 		if (bytesconsumed == (size_t)(-1) ||
506 		    bytesconsumed == (size_t)(-2)) {
507 			/* Something went wrong, return something reasonable */
508 			return old_n;
509 		}
510 		if (s[0] == '\n') {
511 			/*
512 			 * do what strlen() would do, so that caller
513 			 * is always right
514 			 */
515 			width++;
516 		} else
517 			width += wcwidth(nextchar);
518 
519 		s += bytesconsumed, n -= bytesconsumed;
520 	}
521 	return width;
522 #else
523 	return strlen(s);
524 #endif
525 }
526 
527 private void
528 usage(void)
529 {
530 	(void)fprintf(stderr, USAGE, __progname, __progname);
531 #ifdef HAVE_GETOPT_LONG
532 	(void)fputs("Try `file --help' for more information.\n", stderr);
533 #endif
534 	exit(1);
535 }
536 
537 #ifdef HAVE_GETOPT_LONG
538 private void
539 help(void)
540 {
541 	(void)puts(
542 "Usage: file [OPTION]... [FILE]...\n"
543 "Determine file type of FILEs.\n"
544 "\n"
545 "  -m, --magic-file LIST      use LIST as a colon-separated list of magic\n"
546 "                               number files\n"
547 "  -z, --uncompress           try to look inside compressed files\n"
548 "  -b, --brief                do not prepend filenames to output lines\n"
549 "  -c, --checking-printout    print the parsed form of the magic file, use in\n"
550 "                               conjunction with -m to debug a new magic file\n"
551 "                               before installing it\n"
552 "  -e, --exclude              exclude test from the list of test to be\n"
553 "                               performed for file. Valid tests are:\n"
554 "                               ascii, apptype, elf, compress, soft, tar\n"
555 "  -f, --files-from FILE      read the filenames to be examined from FILE\n"
556 "  -F, --separator string     use string as separator instead of `:'\n"
557 "  -k, --keep-going           don't stop at the first match\n"
558 "  -L, --dereference          causes symlinks to be followed\n"
559 "  -n, --no-buffer            do not buffer output\n"
560 "  -N, --no-pad               do not pad output\n"
561 "  -p, --preserve-date        preserve access times on files\n"
562 "  -r, --raw                  don't translate unprintable chars to \\ooo\n"
563 "  -s, --special-files        treat special (block/char devices) files as\n"
564 "                             ordinary ones\n"
565 "or\n"
566 "      --help                 display this help and exit\n"
567 "or\n"
568 "      --version              output version information and exit\n"
569 "or\n"
570 "  -C, --compile              compile file specified by -m\n"
571 );
572 	exit(0);
573 }
574 #endif
575