xref: /netbsd-src/external/bsd/file/dist/src/magic.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	$NetBSD: magic.c,v 1.11 2017/02/10 17:53:24 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Christos Zoulas 2003.
5  * All Rights Reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #ifdef WIN32
31 #include <windows.h>
32 #include <shlwapi.h>
33 #endif
34 
35 #include "file.h"
36 
37 #ifndef	lint
38 #if 0
39 FILE_RCSID("@(#)$File: magic.c,v 1.100 2016/07/18 11:43:05 christos Exp $")
40 #else
41 __RCSID("$NetBSD: magic.c,v 1.11 2017/02/10 17:53:24 christos Exp $");
42 #endif
43 #endif	/* lint */
44 
45 #include "magic.h"
46 
47 #include <stdlib.h>
48 #include <unistd.h>
49 #include <string.h>
50 #ifdef QUICK
51 #include <sys/mman.h>
52 #endif
53 #ifdef HAVE_LIMITS_H
54 #include <limits.h>	/* for PIPE_BUF */
55 #endif
56 
57 #if defined(HAVE_UTIMES)
58 # include <sys/time.h>
59 #elif defined(HAVE_UTIME)
60 # if defined(HAVE_SYS_UTIME_H)
61 #  include <sys/utime.h>
62 # elif defined(HAVE_UTIME_H)
63 #  include <utime.h>
64 # endif
65 #endif
66 
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>	/* for read() */
69 #endif
70 
71 #ifndef PIPE_BUF
72 /* Get the PIPE_BUF from pathconf */
73 #ifdef _PC_PIPE_BUF
74 #define PIPE_BUF pathconf(".", _PC_PIPE_BUF)
75 #else
76 #define PIPE_BUF 512
77 #endif
78 #endif
79 
80 private void close_and_restore(const struct magic_set *, const char *, int,
81     const struct stat *);
82 private int unreadable_info(struct magic_set *, mode_t, const char *);
83 private const char* get_default_magic(void);
84 #ifndef COMPILE_ONLY
85 private const char *file_or_fd(struct magic_set *, const char *, int);
86 #endif
87 
88 #ifndef	STDIN_FILENO
89 #define	STDIN_FILENO	0
90 #endif
91 
92 #ifdef WIN32
93 /* HINSTANCE of this shared library. Needed for get_default_magic() */
94 static HINSTANCE _w32_dll_instance = NULL;
95 
96 static void
97 _w32_append_path(char **hmagicpath, const char *fmt, ...)
98 {
99 	char *tmppath;
100         char *newpath;
101 	va_list ap;
102 
103 	va_start(ap, fmt);
104 	if (vasprintf(&tmppath, fmt, ap) < 0) {
105 		va_end(ap);
106 		return;
107 	}
108 	va_end(ap);
109 
110 	if (access(tmppath, R_OK) == -1)
111 		goto out;
112 
113 	if (*hmagicpath == NULL) {
114 		*hmagicpath = tmppath;
115 		return;
116 	}
117 
118 	if (asprintf(&newpath, "%s%c%s", *hmagicpath, PATHSEP, tmppath) < 0)
119 		goto out;
120 
121 	free(*hmagicpath);
122 	free(tmppath);
123 	*hmagicpath = newpath;
124 	return;
125 out:
126 	free(tmppath);
127 }
128 
129 static void
130 _w32_get_magic_relative_to(char **hmagicpath, HINSTANCE module)
131 {
132 	static const char *trypaths[] = {
133 		"%s/share/misc/magic.mgc",
134 		"%s/magic.mgc",
135 	};
136 	LPSTR dllpath;
137 	size_t sp;
138 
139 	dllpath = calloc(MAX_PATH + 1, sizeof(*dllpath));
140 
141 	if (!GetModuleFileNameA(module, dllpath, MAX_PATH))
142 		goto out;
143 
144 	PathRemoveFileSpecA(dllpath);
145 
146 	if (module) {
147 		char exepath[MAX_PATH];
148 		GetModuleFileNameA(NULL, exepath, MAX_PATH);
149 		PathRemoveFileSpecA(exepath);
150 		if (stricmp(exepath, dllpath) == 0)
151 			goto out;
152 	}
153 
154 	sp = strlen(dllpath);
155 	if (sp > 3 && stricmp(&dllpath[sp - 3], "bin") == 0) {
156 		_w32_append_path(hmagicpath,
157 		    "%s/../share/misc/magic.mgc", dllpath);
158 		goto out;
159 	}
160 
161 	for (sp = 0; sp < __arraycount(trypaths); sp++)
162 		_w32_append_path(hmagicpath, trypaths[sp], dllpath);
163 out:
164 	free(dllpath);
165 }
166 
167 /* Placate GCC by offering a sacrificial previous prototype */
168 BOOL WINAPI DllMain(HINSTANCE, DWORD, LPVOID);
169 
170 BOOL WINAPI
171 DllMain(HINSTANCE hinstDLL, DWORD fdwReason,
172     LPVOID lpvReserved __attribute__((__unused__)))
173 {
174 	if (fdwReason == DLL_PROCESS_ATTACH)
175 		_w32_dll_instance = hinstDLL;
176 	return TRUE;
177 }
178 #endif
179 
180 private const char *
181 get_default_magic(void)
182 {
183 	static const char hmagic[] = "/.magic/magic.mgc";
184 	static char *default_magic;
185 	char *home, *hmagicpath;
186 
187 #ifndef WIN32
188 	struct stat st;
189 
190 	if (default_magic) {
191 		free(default_magic);
192 		default_magic = NULL;
193 	}
194 	if ((home = getenv("HOME")) == NULL)
195 		return MAGIC;
196 
197 	if (asprintf(&hmagicpath, "%s/.magic.mgc", home) < 0)
198 		return MAGIC;
199 	if (stat(hmagicpath, &st) == -1) {
200 		free(hmagicpath);
201 		if (asprintf(&hmagicpath, "%s/.magic", home) < 0)
202 			return MAGIC;
203 		if (stat(hmagicpath, &st) == -1)
204 			goto out;
205 		if (S_ISDIR(st.st_mode)) {
206 			free(hmagicpath);
207 			if (asprintf(&hmagicpath, "%s/%s", home, hmagic) < 0)
208 				return MAGIC;
209 			if (access(hmagicpath, R_OK) == -1)
210 				goto out;
211 		}
212 	}
213 
214 	if (asprintf(&default_magic, "%s:%s", hmagicpath, MAGIC) < 0)
215 		goto out;
216 	free(hmagicpath);
217 	return default_magic;
218 out:
219 	default_magic = NULL;
220 	free(hmagicpath);
221 	return MAGIC;
222 #else
223 	hmagicpath = NULL;
224 
225 	if (default_magic) {
226 		free(default_magic);
227 		default_magic = NULL;
228 	}
229 
230 	/* First, try to get a magic file from user-application data */
231 	if ((home = getenv("LOCALAPPDATA")) != NULL)
232 		_w32_append_path(&hmagicpath, "%s%s", home, hmagic);
233 
234 	/* Second, try to get a magic file from the user profile data */
235 	if ((home = getenv("USERPROFILE")) != NULL)
236 		_w32_append_path(&hmagicpath,
237 		    "%s/Local Settings/Application Data%s", home, hmagic);
238 
239 	/* Third, try to get a magic file from Common Files */
240 	if ((home = getenv("COMMONPROGRAMFILES")) != NULL)
241 		_w32_append_path(&hmagicpath, "%s%s", home, hmagic);
242 
243 	/* Fourth, try to get magic file relative to exe location */
244         _w32_get_magic_relative_to(&hmagicpath, NULL);
245 
246 	/* Fifth, try to get magic file relative to dll location */
247         _w32_get_magic_relative_to(&hmagicpath, _w32_dll_instance);
248 
249 	/* Avoid MAGIC constant - it likely points to a file within MSys tree */
250 	default_magic = hmagicpath;
251 	return default_magic;
252 #endif
253 }
254 
255 public const char *
256 magic_getpath(const char *magicfile, int action)
257 {
258 	if (magicfile != NULL)
259 		return magicfile;
260 
261 	magicfile = getenv("MAGIC");
262 	if (magicfile != NULL)
263 		return magicfile;
264 
265 	return action == FILE_LOAD ? get_default_magic() : MAGIC;
266 }
267 
268 public struct magic_set *
269 magic_open(int flags)
270 {
271 	return file_ms_alloc(flags);
272 }
273 
274 private int
275 unreadable_info(struct magic_set *ms, mode_t md, const char *file)
276 {
277 	if (file) {
278 		/* We cannot open it, but we were able to stat it. */
279 		if (access(file, W_OK) == 0)
280 			if (file_printf(ms, "writable, ") == -1)
281 				return -1;
282 		if (access(file, X_OK) == 0)
283 			if (file_printf(ms, "executable, ") == -1)
284 				return -1;
285 	}
286 	if (S_ISREG(md))
287 		if (file_printf(ms, "regular file, ") == -1)
288 			return -1;
289 	if (file_printf(ms, "no read permission") == -1)
290 		return -1;
291 	return 0;
292 }
293 
294 public void
295 magic_close(struct magic_set *ms)
296 {
297 	if (ms == NULL)
298 		return;
299 	file_ms_free(ms);
300 }
301 
302 /*
303  * load a magic file
304  */
305 public int
306 magic_load(struct magic_set *ms, const char *magicfile)
307 {
308 	if (ms == NULL)
309 		return -1;
310 	return file_apprentice(ms, magicfile, FILE_LOAD);
311 }
312 
313 #ifndef COMPILE_ONLY
314 /*
315  * Install a set of compiled magic buffers.
316  */
317 public int
318 magic_load_buffers(struct magic_set *ms, void **bufs, size_t *sizes,
319     size_t nbufs)
320 {
321 	if (ms == NULL)
322 		return -1;
323 	return buffer_apprentice(ms, (struct magic **)bufs, sizes, nbufs);
324 }
325 #endif
326 
327 public int
328 magic_compile(struct magic_set *ms, const char *magicfile)
329 {
330 	if (ms == NULL)
331 		return -1;
332 	return file_apprentice(ms, magicfile, FILE_COMPILE);
333 }
334 
335 public int
336 magic_check(struct magic_set *ms, const char *magicfile)
337 {
338 	if (ms == NULL)
339 		return -1;
340 	return file_apprentice(ms, magicfile, FILE_CHECK);
341 }
342 
343 public int
344 magic_list(struct magic_set *ms, const char *magicfile)
345 {
346 	if (ms == NULL)
347 		return -1;
348 	return file_apprentice(ms, magicfile, FILE_LIST);
349 }
350 
351 private void
352 close_and_restore(const struct magic_set *ms, const char *name, int fd,
353     const struct stat *sb)
354 {
355 	if (fd == STDIN_FILENO || name == NULL)
356 		return;
357 	(void) close(fd);
358 
359 	if ((ms->flags & MAGIC_PRESERVE_ATIME) != 0) {
360 		/*
361 		 * Try to restore access, modification times if read it.
362 		 * This is really *bad* because it will modify the status
363 		 * time of the file... And of course this will affect
364 		 * backup programs
365 		 */
366 #ifdef HAVE_UTIMES
367 		struct timeval  utsbuf[2];
368 		(void)memset(utsbuf, 0, sizeof(utsbuf));
369 		utsbuf[0].tv_sec = sb->st_atime;
370 		utsbuf[1].tv_sec = sb->st_mtime;
371 
372 		(void) utimes(name, utsbuf); /* don't care if loses */
373 #elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
374 		struct utimbuf  utbuf;
375 
376 		(void)memset(&utbuf, 0, sizeof(utbuf));
377 		utbuf.actime = sb->st_atime;
378 		utbuf.modtime = sb->st_mtime;
379 		(void) utime(name, &utbuf); /* don't care if loses */
380 #endif
381 	}
382 }
383 
384 #ifndef COMPILE_ONLY
385 
386 /*
387  * find type of descriptor
388  */
389 public const char *
390 magic_descriptor(struct magic_set *ms, int fd)
391 {
392 	if (ms == NULL)
393 		return NULL;
394 	return file_or_fd(ms, NULL, fd);
395 }
396 
397 /*
398  * find type of named file
399  */
400 public const char *
401 magic_file(struct magic_set *ms, const char *inname)
402 {
403 	if (ms == NULL)
404 		return NULL;
405 	return file_or_fd(ms, inname, STDIN_FILENO);
406 }
407 
408 private const char *
409 file_or_fd(struct magic_set *ms, const char *inname, int fd)
410 {
411 	int	rv = -1;
412 	unsigned char *buf;
413 	struct stat	sb;
414 	ssize_t nbytes = 0;	/* number of bytes read from a datafile */
415 	int	ispipe = 0;
416 	off_t	pos = (off_t)-1;
417 
418 	if (file_reset(ms) == -1)
419 		goto out;
420 
421 	/*
422 	 * one extra for terminating '\0', and
423 	 * some overlapping space for matches near EOF
424 	 */
425 #define SLOP (1 + sizeof(union VALUETYPE))
426 	if ((buf = CAST(unsigned char *, malloc(ms->bytes_max + SLOP))) == NULL)
427 		return NULL;
428 
429 	switch (file_fsmagic(ms, inname, &sb)) {
430 	case -1:		/* error */
431 		goto done;
432 	case 0:			/* nothing found */
433 		break;
434 	default:		/* matched it and printed type */
435 		rv = 0;
436 		goto done;
437 	}
438 
439 #ifdef WIN32
440 	/* Place stdin in binary mode, so EOF (Ctrl+Z) doesn't stop early. */
441 	if (fd == STDIN_FILENO)
442 		_setmode(STDIN_FILENO, O_BINARY);
443 #endif
444 
445 	if (inname == NULL) {
446 		if (fstat(fd, &sb) == 0 && S_ISFIFO(sb.st_mode))
447 			ispipe = 1;
448 		else
449 			pos = lseek(fd, (off_t)0, SEEK_CUR);
450 	} else {
451 		int flags = O_RDONLY|O_BINARY;
452 		int okstat = stat(inname, &sb) == 0;
453 
454 		if (okstat && S_ISFIFO(sb.st_mode)) {
455 #ifdef O_NONBLOCK
456 			flags |= O_NONBLOCK;
457 #endif
458 			ispipe = 1;
459 		}
460 
461 		errno = 0;
462 		if ((fd = open(inname, flags)) < 0) {
463 #ifdef WIN32
464 			/*
465 			 * Can't stat, can't open.  It may have been opened in
466 			 * fsmagic, so if the user doesn't have read permission,
467 			 * allow it to say so; otherwise an error was probably
468 			 * displayed in fsmagic.
469 			 */
470 			if (!okstat && errno == EACCES) {
471 				sb.st_mode = S_IFBLK;
472 				okstat = 1;
473 			}
474 #endif
475 			if (okstat &&
476 			    unreadable_info(ms, sb.st_mode, inname) == -1)
477 				goto done;
478 			rv = 0;
479 			goto done;
480 		}
481 #ifdef O_NONBLOCK
482 		if ((flags = fcntl(fd, F_GETFL)) != -1) {
483 			flags &= ~O_NONBLOCK;
484 			(void)fcntl(fd, F_SETFL, flags);
485 		}
486 #endif
487 	}
488 
489 	/*
490 	 * try looking at the first ms->bytes_max bytes
491 	 */
492 	if (ispipe) {
493 		ssize_t r = 0;
494 
495 		while ((r = sread(fd, (void *)&buf[nbytes],
496 		    (size_t)(ms->bytes_max - nbytes), 1)) > 0) {
497 			nbytes += r;
498 			if (r < PIPE_BUF) break;
499 		}
500 
501 		if (nbytes == 0 && inname) {
502 			/* We can not read it, but we were able to stat it. */
503 			if (unreadable_info(ms, sb.st_mode, inname) == -1)
504 				goto done;
505 			rv = 0;
506 			goto done;
507 		}
508 
509 	} else {
510 		/* Windows refuses to read from a big console buffer. */
511 		size_t howmany =
512 #if defined(WIN32)
513 				_isatty(fd) ? 8 * 1024 :
514 #endif
515 				ms->bytes_max;
516 		if ((nbytes = read(fd, (char *)buf, howmany)) == -1) {
517 			if (inname == NULL && fd != STDIN_FILENO)
518 				file_error(ms, errno, "cannot read fd %d", fd);
519 			else
520 				file_error(ms, errno, "cannot read `%s'",
521 				    inname == NULL ? "/dev/stdin" : inname);
522 			goto done;
523 		}
524 	}
525 
526 	(void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */
527 	if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1)
528 		goto done;
529 	rv = 0;
530 done:
531 	free(buf);
532 	if (fd != -1) {
533 		if (pos != (off_t)-1)
534 			(void)lseek(fd, pos, SEEK_SET);
535 		close_and_restore(ms, inname, fd, &sb);
536 	}
537 out:
538 	return rv == 0 ? file_getbuffer(ms) : NULL;
539 }
540 
541 
542 public const char *
543 magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
544 {
545 	if (ms == NULL)
546 		return NULL;
547 	if (file_reset(ms) == -1)
548 		return NULL;
549 	/*
550 	 * The main work is done here!
551 	 * We have the file name and/or the data buffer to be identified.
552 	 */
553 	if (file_buffer(ms, -1, NULL, buf, nb) == -1) {
554 		return NULL;
555 	}
556 	return file_getbuffer(ms);
557 }
558 #endif
559 
560 public const char *
561 magic_error(struct magic_set *ms)
562 {
563 	if (ms == NULL)
564 		return "Magic database is not open";
565 	return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
566 }
567 
568 public int
569 magic_errno(struct magic_set *ms)
570 {
571 	if (ms == NULL)
572 		return EINVAL;
573 	return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
574 }
575 
576 public int
577 magic_setflags(struct magic_set *ms, int flags)
578 {
579 	if (ms == NULL)
580 		return -1;
581 #if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES)
582 	if (flags & MAGIC_PRESERVE_ATIME)
583 		return -1;
584 #endif
585 	ms->flags = flags;
586 	return 0;
587 }
588 
589 public int
590 magic_version(void)
591 {
592 	return MAGIC_VERSION;
593 }
594 
595 public int
596 magic_setparam(struct magic_set *ms, int param, const void *val)
597 {
598 	switch (param) {
599 	case MAGIC_PARAM_INDIR_MAX:
600 		ms->indir_max = (uint16_t)*(const size_t *)val;
601 		return 0;
602 	case MAGIC_PARAM_NAME_MAX:
603 		ms->name_max = (uint16_t)*(const size_t *)val;
604 		return 0;
605 	case MAGIC_PARAM_ELF_PHNUM_MAX:
606 		ms->elf_phnum_max = (uint16_t)*(const size_t *)val;
607 		return 0;
608 	case MAGIC_PARAM_ELF_SHNUM_MAX:
609 		ms->elf_shnum_max = (uint16_t)*(const size_t *)val;
610 		return 0;
611 	case MAGIC_PARAM_ELF_NOTES_MAX:
612 		ms->elf_notes_max = (uint16_t)*(const size_t *)val;
613 		return 0;
614 	case MAGIC_PARAM_REGEX_MAX:
615 		ms->elf_notes_max = (uint16_t)*(const size_t *)val;
616 		return 0;
617 	case MAGIC_PARAM_BYTES_MAX:
618 		ms->bytes_max = *(const size_t *)val;
619 		return 0;
620 	default:
621 		errno = EINVAL;
622 		return -1;
623 	}
624 }
625 
626 public int
627 magic_getparam(struct magic_set *ms, int param, void *val)
628 {
629 	switch (param) {
630 	case MAGIC_PARAM_INDIR_MAX:
631 		*(size_t *)val = ms->indir_max;
632 		return 0;
633 	case MAGIC_PARAM_NAME_MAX:
634 		*(size_t *)val = ms->name_max;
635 		return 0;
636 	case MAGIC_PARAM_ELF_PHNUM_MAX:
637 		*(size_t *)val = ms->elf_phnum_max;
638 		return 0;
639 	case MAGIC_PARAM_ELF_SHNUM_MAX:
640 		*(size_t *)val = ms->elf_shnum_max;
641 		return 0;
642 	case MAGIC_PARAM_ELF_NOTES_MAX:
643 		*(size_t *)val = ms->elf_notes_max;
644 		return 0;
645 	case MAGIC_PARAM_REGEX_MAX:
646 		*(size_t *)val = ms->regex_max;
647 		return 0;
648 	case MAGIC_PARAM_BYTES_MAX:
649 		*(size_t *)val = ms->bytes_max;
650 		return 0;
651 	default:
652 		errno = EINVAL;
653 		return -1;
654 	}
655 }
656