xref: /netbsd-src/external/bsd/file/dist/src/compress.c (revision 4724848cf0da353df257f730694b7882798e5daf)
1 /*	$NetBSD: compress.c,v 1.22 2022/09/24 20:21:46 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * compress routines:
32  *	zmagic() - returns 0 if not recognized, uncompresses and prints
33  *		   information if recognized
34  *	uncompress(method, old, n, newch) - uncompress old into new,
35  *					    using method, return sizeof new
36  */
37 #include "file.h"
38 
39 #ifndef lint
40 #if 0
41 FILE_RCSID("@(#)$File: compress.c,v 1.147 2022/09/21 13:55:46 christos Exp $")
42 #else
43 __RCSID("$NetBSD: compress.c,v 1.22 2022/09/24 20:21:46 christos Exp $");
44 #endif
45 #endif
46 
47 #include "magic.h"
48 #include <stdlib.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 #ifdef HAVE_SPAWN_H
53 #include <spawn.h>
54 #endif
55 #include <string.h>
56 #include <errno.h>
57 #include <ctype.h>
58 #include <stdarg.h>
59 #include <signal.h>
60 #ifndef HAVE_SIG_T
61 typedef void (*sig_t)(int);
62 #endif /* HAVE_SIG_T */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
65 #endif
66 #ifdef HAVE_SYS_WAIT_H
67 #include <sys/wait.h>
68 #endif
69 #if defined(HAVE_SYS_TIME_H)
70 #include <sys/time.h>
71 #endif
72 
73 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
74 #define BUILTIN_DECOMPRESS
75 #include <zlib.h>
76 #endif
77 
78 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
79 #define BUILTIN_BZLIB
80 #include <bzlib.h>
81 #endif
82 
83 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
84 #define BUILTIN_XZLIB
85 #include <lzma.h>
86 #endif
87 
88 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
89 #define BUILTIN_ZSTDLIB
90 #include <zstd.h>
91 #include <zstd_errors.h>
92 #endif
93 
94 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
95 #define BUILTIN_LZLIB
96 #include <lzlib.h>
97 #endif
98 
99 #ifdef DEBUG
100 int tty = -1;
101 #define DPRINTF(...)	do { \
102 	if (tty == -1) \
103 		tty = open("/dev/tty", O_RDWR); \
104 	if (tty == -1) \
105 		abort(); \
106 	dprintf(tty, __VA_ARGS__); \
107 } while (/*CONSTCOND*/0)
108 #else
109 #define DPRINTF(...)
110 #endif
111 
112 #ifdef ZLIBSUPPORT
113 /*
114  * The following python code is not really used because ZLIBSUPPORT is only
115  * defined if we have a built-in zlib, and the built-in zlib handles that.
116  * That is not true for android where we have zlib.h and not -lz.
117  */
118 static const char zlibcode[] =
119     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
120 
121 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
122 
123 static int
124 zlibcmp(const unsigned char *buf)
125 {
126 	unsigned short x = 1;
127 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
128 
129 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
130 		return 0;
131 	if (s[0] != 1)	/* endianness test */
132 		x = buf[0] | (buf[1] << 8);
133 	else
134 		x = buf[1] | (buf[0] << 8);
135 	if (x % 31)
136 		return 0;
137 	return 1;
138 }
139 #endif
140 
141 static int
142 lzmacmp(const unsigned char *buf)
143 {
144 	if (buf[0] != 0x5d || buf[1] || buf[2])
145 		return 0;
146 	if (buf[12] && buf[12] != 0xff)
147 		return 0;
148 	return 1;
149 }
150 
151 #define gzip_flags "-cd"
152 #define lrzip_flags "-do"
153 #define lzip_flags gzip_flags
154 
155 static const char *gzip_args[] = {
156 	"gzip", gzip_flags, NULL
157 };
158 static const char *uncompress_args[] = {
159 	"uncompress", "-c", NULL
160 };
161 static const char *bzip2_args[] = {
162 	"bzip2", "-cd", NULL
163 };
164 static const char *lzip_args[] = {
165 	"lzip", lzip_flags, NULL
166 };
167 static const char *xz_args[] = {
168 	"xz", "-cd", NULL
169 };
170 static const char *lrzip_args[] = {
171 	"lrzip", lrzip_flags, NULL
172 };
173 static const char *lz4_args[] = {
174 	"lz4", "-cd", NULL
175 };
176 static const char *zstd_args[] = {
177 	"zstd", "-cd", NULL
178 };
179 
180 #define	do_zlib		NULL
181 #define	do_bzlib	NULL
182 
183 private const struct {
184 	union {
185 		const char *magic;
186 		int (*func)(const unsigned char *);
187 	} u;
188 	int maglen;
189 	const char **argv;
190 	void *unused;
191 } compr[] = {
192 #define METH_FROZEN	2
193 #define METH_BZIP	7
194 #define METH_XZ		9
195 #define METH_LZIP	8
196 #define METH_ZSTD	12
197 #define METH_LZMA	13
198 #define METH_ZLIB	14
199     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
200     /* Uncompress can get stuck; so use gzip first if we have it
201      * Idea from Damien Clark, thanks! */
202     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
203     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
204     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
205     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
206     /* the standard pack utilities do not accept standard input */
207     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
208     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
209     /* ...only first file examined */
210     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
211     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
212     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
213     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
214     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
215     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
216     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
217 #ifdef ZLIBSUPPORT
218     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
219 #endif
220 };
221 
222 #define OKDATA 	0
223 #define NODATA	1
224 #define ERRDATA	2
225 
226 private ssize_t swrite(int, const void *, size_t);
227 #if HAVE_FORK
228 private size_t ncompr = __arraycount(compr);
229 private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
230     unsigned char **, size_t *);
231 #ifdef BUILTIN_DECOMPRESS
232 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
233     size_t *, int);
234 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
235     size_t *, int);
236 #endif
237 #ifdef BUILTIN_BZLIB
238 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
239     size_t *, int);
240 #endif
241 #ifdef BUILTIN_XZLIB
242 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
243     size_t *, int);
244 #endif
245 #ifdef BUILTIN_ZSTDLIB
246 private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
247     size_t *, int);
248 #endif
249 #ifdef BUILTIN_LZLIB
250 private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
251     size_t *, int);
252 #endif
253 
254 static int makeerror(unsigned char **, size_t *, const char *, ...)
255     __attribute__((__format__(__printf__, 3, 4)));
256 private const char *methodname(size_t);
257 
258 private int
259 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
260 {
261 	unsigned char *p;
262 	int mime = ms->flags & MAGIC_MIME;
263 
264 	if (!mime)
265 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
266 
267 	for (p = buf; *p; p++)
268 		if (!isalnum(*p))
269 			*p = '-';
270 
271 	return file_printf(ms, "application/x-decompression-error-%s-%s",
272 	    methodname(i), buf);
273 }
274 
275 protected int
276 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
277 {
278 	unsigned char *newbuf = NULL;
279 	size_t i, nsz;
280 	char *rbuf;
281 	file_pushbuf_t *pb;
282 	int urv, prv, rv = 0;
283 	int mime = ms->flags & MAGIC_MIME;
284 	int fd = b->fd;
285 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
286 	size_t nbytes = b->flen;
287 	int sa_saved = 0;
288 	struct sigaction sig_act;
289 
290 	if ((ms->flags & MAGIC_COMPRESS) == 0)
291 		return 0;
292 
293 	for (i = 0; i < ncompr; i++) {
294 		int zm;
295 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
296 			continue;
297 		if (compr[i].maglen < 0) {
298 			zm = (*compr[i].u.func)(buf);
299 		} else {
300 			zm = memcmp(buf, compr[i].u.magic,
301 			    CAST(size_t, compr[i].maglen)) == 0;
302 		}
303 
304 		if (!zm)
305 			continue;
306 
307 		/* Prevent SIGPIPE death if child dies unexpectedly */
308 		if (!sa_saved) {
309 			//We can use sig_act for both new and old, but
310 			struct sigaction new_act;
311 			memset(&new_act, 0, sizeof(new_act));
312 			new_act.sa_handler = SIG_IGN;
313 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
314 		}
315 
316 		nsz = nbytes;
317 		urv = uncompressbuf(fd, ms->bytes_max, i,
318 		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
319 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
320 		    (char *)newbuf, nsz);
321 		switch (urv) {
322 		case OKDATA:
323 		case ERRDATA:
324 			ms->flags &= ~MAGIC_COMPRESS;
325 			if (urv == ERRDATA)
326 				prv = format_decompression_error(ms, i, newbuf);
327 			else
328 				prv = file_buffer(ms, -1, NULL, name, newbuf,
329 				    nsz);
330 			if (prv == -1)
331 				goto error;
332 			rv = 1;
333 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
334 				goto out;
335 			if (mime != MAGIC_MIME && mime != 0)
336 				goto out;
337 			if ((file_printf(ms,
338 			    mime ? " compressed-encoding=" : " (")) == -1)
339 				goto error;
340 			if ((pb = file_push_buffer(ms)) == NULL)
341 				goto error;
342 			/*
343 			 * XXX: If file_buffer fails here, we overwrite
344 			 * the compressed text. FIXME.
345 			 */
346 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
347 			{
348 				if (file_pop_buffer(ms, pb) != NULL)
349 					abort();
350 				goto error;
351 			}
352 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
353 				if (file_printf(ms, "%s", rbuf) == -1) {
354 					free(rbuf);
355 					goto error;
356 				}
357 				free(rbuf);
358 			}
359 			if (!mime && file_printf(ms, ")") == -1)
360 				goto error;
361 			/*FALLTHROUGH*/
362 		case NODATA:
363 			break;
364 		default:
365 			abort();
366 			/*NOTREACHED*/
367 		error:
368 			rv = -1;
369 			break;
370 		}
371 	}
372 out:
373 	DPRINTF("rv = %d\n", rv);
374 
375 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
376 		(void)sigaction(SIGPIPE, &sig_act, NULL);
377 
378 	free(newbuf);
379 	ms->flags |= MAGIC_COMPRESS;
380 	DPRINTF("Zmagic returns %d\n", rv);
381 	return rv;
382 }
383 #endif
384 /*
385  * `safe' write for sockets and pipes.
386  */
387 private ssize_t
388 swrite(int fd, const void *buf, size_t n)
389 {
390 	ssize_t rv;
391 	size_t rn = n;
392 
393 	do
394 		switch (rv = write(fd, buf, n)) {
395 		case -1:
396 			if (errno == EINTR)
397 				continue;
398 			return -1;
399 		default:
400 			n -= rv;
401 			buf = CAST(const char *, buf) + rv;
402 			break;
403 		}
404 	while (n > 0);
405 	return rn;
406 }
407 
408 
409 /*
410  * `safe' read for sockets and pipes.
411  */
412 protected ssize_t
413 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
414 {
415 	ssize_t rv;
416 #ifdef FIONREAD
417 	int t = 0;
418 #endif
419 	size_t rn = n;
420 
421 	if (fd == STDIN_FILENO)
422 		goto nocheck;
423 
424 #ifdef FIONREAD
425 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
426 #ifdef FD_ZERO
427 		ssize_t cnt;
428 		for (cnt = 0;; cnt++) {
429 			fd_set check;
430 			struct timeval tout = {0, 100 * 1000};
431 			int selrv;
432 
433 			FD_ZERO(&check);
434 			FD_SET(fd, &check);
435 
436 			/*
437 			 * Avoid soft deadlock: do not read if there
438 			 * is nothing to read from sockets and pipes.
439 			 */
440 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
441 			if (selrv == -1) {
442 				if (errno == EINTR || errno == EAGAIN)
443 					continue;
444 			} else if (selrv == 0 && cnt >= 5) {
445 				return 0;
446 			} else
447 				break;
448 		}
449 #endif
450 		(void)ioctl(fd, FIONREAD, &t);
451 	}
452 
453 	if (t > 0 && CAST(size_t, t) < n) {
454 		n = t;
455 		rn = n;
456 	}
457 #endif
458 
459 nocheck:
460 	do
461 		switch ((rv = read(fd, buf, n))) {
462 		case -1:
463 			if (errno == EINTR)
464 				continue;
465 			return -1;
466 		case 0:
467 			return rn - n;
468 		default:
469 			n -= rv;
470 			buf = CAST(char *, CCAST(void *, buf)) + rv;
471 			break;
472 		}
473 	while (n > 0);
474 	return rn;
475 }
476 
477 protected int
478 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
479     size_t nbytes)
480 {
481 	char buf[4096];
482 	ssize_t r;
483 	int tfd;
484 
485 #ifdef WIN32
486 	const char *t;
487 	buf[0] = '\0';
488 	if ((t = getenv("TEMP")) != NULL)
489 		(void)strlcpy(buf, t, sizeof(buf));
490 	else if ((t = getenv("TMP")) != NULL)
491 		(void)strlcpy(buf, t, sizeof(buf));
492 	else if ((t = getenv("TMPDIR")) != NULL)
493 		(void)strlcpy(buf, t, sizeof(buf));
494 	if (buf[0] != '\0')
495 		(void)strlcat(buf, "/", sizeof(buf));
496 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
497 #else
498 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
499 #endif
500 #ifndef HAVE_MKSTEMP
501 	{
502 		char *ptr = mktemp(buf);
503 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
504 		r = errno;
505 		(void)unlink(ptr);
506 		errno = r;
507 	}
508 #else
509 	{
510 		int te;
511 		mode_t ou = umask(0);
512 		tfd = mkstemp(buf);
513 		(void)umask(ou);
514 		te = errno;
515 		(void)unlink(buf);
516 		errno = te;
517 	}
518 #endif
519 	if (tfd == -1) {
520 		file_error(ms, errno,
521 		    "cannot create temporary file for pipe copy");
522 		return -1;
523 	}
524 
525 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
526 		r = 1;
527 	else {
528 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
529 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
530 				break;
531 	}
532 
533 	switch (r) {
534 	case -1:
535 		file_error(ms, errno, "error copying from pipe to temp file");
536 		return -1;
537 	case 0:
538 		break;
539 	default:
540 		file_error(ms, errno, "error while writing to temp file");
541 		return -1;
542 	}
543 
544 	/*
545 	 * We duplicate the file descriptor, because fclose on a
546 	 * tmpfile will delete the file, but any open descriptors
547 	 * can still access the phantom inode.
548 	 */
549 	if ((fd = dup2(tfd, fd)) == -1) {
550 		file_error(ms, errno, "could not dup descriptor for temp file");
551 		return -1;
552 	}
553 	(void)close(tfd);
554 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
555 		file_badseek(ms);
556 		return -1;
557 	}
558 	return fd;
559 }
560 #if HAVE_FORK
561 #ifdef BUILTIN_DECOMPRESS
562 
563 #define FHCRC		(1 << 1)
564 #define FEXTRA		(1 << 2)
565 #define FNAME		(1 << 3)
566 #define FCOMMENT	(1 << 4)
567 
568 
569 private int
570 uncompressgzipped(const unsigned char *old, unsigned char **newch,
571     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
572 {
573 	unsigned char flg;
574 	size_t data_start = 10;
575 
576 	if (*n < 4) {
577 		goto err;
578 	}
579 
580 	flg = old[3];
581 
582 	if (flg & FEXTRA) {
583 		if (data_start + 1 >= *n)
584 			goto err;
585 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
586 	}
587 	if (flg & FNAME) {
588 		while(data_start < *n && old[data_start])
589 			data_start++;
590 		data_start++;
591 	}
592 	if (flg & FCOMMENT) {
593 		while(data_start < *n && old[data_start])
594 			data_start++;
595 		data_start++;
596 	}
597 	if (flg & FHCRC)
598 		data_start += 2;
599 
600 	if (data_start >= *n)
601 		goto err;
602 
603 	*n -= data_start;
604 	old += data_start;
605 	return uncompresszlib(old, newch, bytes_max, n, 0);
606 err:
607 	return makeerror(newch, n, "File too short");
608 }
609 
610 private int
611 uncompresszlib(const unsigned char *old, unsigned char **newch,
612     size_t bytes_max, size_t *n, int zlib)
613 {
614 	int rc;
615 	z_stream z;
616 
617 	z.next_in = CCAST(Bytef *, old);
618 	z.avail_in = CAST(uint32_t, *n);
619 	z.next_out = *newch;
620 	z.avail_out = CAST(unsigned int, bytes_max);
621 	z.zalloc = Z_NULL;
622 	z.zfree = Z_NULL;
623 	z.opaque = Z_NULL;
624 
625 	/* LINTED bug in header macro */
626 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
627 	if (rc != Z_OK)
628 		goto err;
629 
630 	rc = inflate(&z, Z_SYNC_FLUSH);
631 	if (rc != Z_OK && rc != Z_STREAM_END) {
632 		inflateEnd(&z);
633 		goto err;
634 	}
635 
636 	*n = CAST(size_t, z.total_out);
637 	rc = inflateEnd(&z);
638 	if (rc != Z_OK)
639 		goto err;
640 
641 	/* let's keep the nul-terminate tradition */
642 	(*newch)[*n] = '\0';
643 
644 	return OKDATA;
645 err:
646 	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
647 }
648 #endif
649 
650 #ifdef BUILTIN_BZLIB
651 private int
652 uncompressbzlib(const unsigned char *old, unsigned char **newch,
653     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
654 {
655 	int rc;
656 	bz_stream bz;
657 
658 	memset(&bz, 0, sizeof(bz));
659 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
660 	if (rc != BZ_OK)
661 		goto err;
662 
663 	bz.next_in = CCAST(char *, RCAST(const char *, old));
664 	bz.avail_in = CAST(uint32_t, *n);
665 	bz.next_out = RCAST(char *, *newch);
666 	bz.avail_out = CAST(unsigned int, bytes_max);
667 
668 	rc = BZ2_bzDecompress(&bz);
669 	if (rc != BZ_OK && rc != BZ_STREAM_END) {
670 		BZ2_bzDecompressEnd(&bz);
671 		goto err;
672 	}
673 
674 	/* Assume byte_max is within 32bit */
675 	/* assert(bz.total_out_hi32 == 0); */
676 	*n = CAST(size_t, bz.total_out_lo32);
677 	rc = BZ2_bzDecompressEnd(&bz);
678 	if (rc != BZ_OK)
679 		goto err;
680 
681 	/* let's keep the nul-terminate tradition */
682 	(*newch)[*n] = '\0';
683 
684 	return OKDATA;
685 err:
686 	return makeerror(newch, n, "bunzip error %d", rc);
687 }
688 #endif
689 
690 #ifdef BUILTIN_XZLIB
691 private int
692 uncompressxzlib(const unsigned char *old, unsigned char **newch,
693     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
694 {
695 	int rc;
696 	lzma_stream xz;
697 
698 	memset(&xz, 0, sizeof(xz));
699 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
700 	if (rc != LZMA_OK)
701 		goto err;
702 
703 	xz.next_in = CCAST(const uint8_t *, old);
704 	xz.avail_in = CAST(uint32_t, *n);
705 	xz.next_out = RCAST(uint8_t *, *newch);
706 	xz.avail_out = CAST(unsigned int, bytes_max);
707 
708 	rc = lzma_code(&xz, LZMA_RUN);
709 	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
710 		lzma_end(&xz);
711 		goto err;
712 	}
713 
714 	*n = CAST(size_t, xz.total_out);
715 
716 	lzma_end(&xz);
717 
718 	/* let's keep the nul-terminate tradition */
719 	(*newch)[*n] = '\0';
720 
721 	return OKDATA;
722 err:
723 	return makeerror(newch, n, "unxz error %d", rc);
724 }
725 #endif
726 
727 #ifdef BUILTIN_ZSTDLIB
728 private int
729 uncompresszstd(const unsigned char *old, unsigned char **newch,
730     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
731 {
732 	size_t rc;
733 	ZSTD_DStream *zstd;
734 	ZSTD_inBuffer in;
735 	ZSTD_outBuffer out;
736 
737 	if ((zstd = ZSTD_createDStream()) == NULL) {
738 		return makeerror(newch, n, "No ZSTD decompression stream, %s",
739 		    strerror(errno));
740 	}
741 
742 	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
743 	if (ZSTD_isError(rc))
744 		goto err;
745 
746 	in.src = CCAST(const void *, old);
747 	in.size = *n;
748 	in.pos = 0;
749 	out.dst = RCAST(void *, *newch);
750 	out.size = bytes_max;
751 	out.pos = 0;
752 
753 	rc = ZSTD_decompressStream(zstd, &out, &in);
754 	if (ZSTD_isError(rc))
755 		goto err;
756 
757 	*n = out.pos;
758 
759 	ZSTD_freeDStream(zstd);
760 
761 	/* let's keep the nul-terminate tradition */
762 	(*newch)[*n] = '\0';
763 
764 	return OKDATA;
765 err:
766 	ZSTD_freeDStream(zstd);
767 	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
768 }
769 #endif
770 
771 #ifdef BUILTIN_LZLIB
772 private int
773 uncompresslzlib(const unsigned char *old, unsigned char **newch,
774     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
775 {
776 	enum LZ_Errno err;
777 	size_t old_remaining = *n;
778 	size_t new_remaining = bytes_max;
779 	size_t total_read = 0;
780 	unsigned char *bufp;
781 	struct LZ_Decoder *dec;
782 
783 	bufp = *newch;
784 
785 	dec = LZ_decompress_open();
786 	if (!dec) {
787 		return makeerror(newch, n, "unable to allocate LZ_Decoder");
788 	}
789 	if (LZ_decompress_errno(dec) != LZ_ok)
790 		goto err;
791 
792 	for (;;) {
793 		// LZ_decompress_read() stops at member boundaries, so we may
794 		// have more than one successful read after writing all data
795 		// we have.
796 		if (old_remaining > 0) {
797 			int wr = LZ_decompress_write(dec, old, old_remaining);
798 			if (wr < 0)
799 				goto err;
800 			old_remaining -= wr;
801 			old += wr;
802 		}
803 
804 		int rd = LZ_decompress_read(dec, bufp, new_remaining);
805 		if (rd > 0) {
806 			new_remaining -= rd;
807 			bufp += rd;
808 			total_read += rd;
809 		}
810 
811 		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
812 			goto err;
813 		if (new_remaining == 0)
814 			break;
815 		if (old_remaining == 0 && rd == 0)
816 			break;
817 	}
818 
819 	LZ_decompress_close(dec);
820 	*n = total_read;
821 
822 	/* let's keep the nul-terminate tradition */
823 	*bufp = '\0';
824 
825 	return OKDATA;
826 err:
827 	err = LZ_decompress_errno(dec);
828 	LZ_decompress_close(dec);
829 	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
830 }
831 #endif
832 
833 
834 static int
835 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
836 {
837 	char *msg;
838 	va_list ap;
839 	int rv;
840 
841 	free(*buf);
842 	va_start(ap, fmt);
843 	rv = vasprintf(&msg, fmt, ap);
844 	va_end(ap);
845 	if (rv < 0) {
846 		*buf = NULL;
847 		*len = 0;
848 		return NODATA;
849 	}
850 	*buf = RCAST(unsigned char *, msg);
851 	*len = strlen(msg);
852 	return ERRDATA;
853 }
854 
855 static void
856 closefd(int *fd, size_t i)
857 {
858 	if (fd[i] == -1)
859 		return;
860 	(void) close(fd[i]);
861 	fd[i] = -1;
862 }
863 
864 static void
865 closep(int *fd)
866 {
867 	size_t i;
868 	for (i = 0; i < 2; i++)
869 		closefd(fd, i);
870 }
871 
872 static void
873 movedesc(void *v, int i, int fd)
874 {
875 	if (fd == i)
876 		return; /* "no dup was necessary" */
877 #ifdef HAVE_POSIX_SPAWNP
878 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
879 	posix_spawn_file_actions_adddup2(fa, fd, i);
880 	posix_spawn_file_actions_addclose(fa, fd);
881 #else
882 	if (dup2(fd, i) == -1) {
883 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
884 		exit(EXIT_FAILURE);
885 	}
886 	close(v ? fd : fd);
887 #endif
888 }
889 
890 static void
891 closedesc(void *v, int fd)
892 {
893 #ifdef HAVE_POSIX_SPAWNP
894 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
895 	posix_spawn_file_actions_addclose(fa, fd);
896 #else
897 	close(v ? fd : fd);
898 #endif
899 }
900 
901 static void
902 handledesc(void *v, int fd, int fdp[3][2])
903 {
904 	if (fd != -1) {
905 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
906 		movedesc(v, STDIN_FILENO, fd);
907 	} else {
908 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
909 		if (fdp[STDIN_FILENO][1] > 2)
910 		    closedesc(v, fdp[STDIN_FILENO][1]);
911 	}
912 
913 	file_clear_closexec(STDIN_FILENO);
914 
915 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
916 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
917 	if (fdp[STDOUT_FILENO][0] > 2)
918 		closedesc(v, fdp[STDOUT_FILENO][0]);
919 
920 	file_clear_closexec(STDOUT_FILENO);
921 
922 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
923 	if (fdp[STDERR_FILENO][0] > 2)
924 		closedesc(v, fdp[STDERR_FILENO][0]);
925 
926 	file_clear_closexec(STDERR_FILENO);
927 }
928 
929 static pid_t
930 writechild(int fd, const void *old, size_t n)
931 {
932 	pid_t pid;
933 
934 	/*
935 	 * fork again, to avoid blocking because both
936 	 * pipes filled
937 	 */
938 	pid = fork();
939 	if (pid == -1) {
940 		DPRINTF("Fork failed (%s)\n", strerror(errno));
941 		return -1;
942 	}
943 	if (pid == 0) {
944 		/* child */
945 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
946 			DPRINTF("Write failed (%s)\n", strerror(errno));
947 			exit(EXIT_FAILURE);
948 		}
949 		exit(EXIT_SUCCESS);
950 	}
951 	/* parent */
952 	return pid;
953 }
954 
955 static ssize_t
956 filter_error(unsigned char *ubuf, ssize_t n)
957 {
958 	char *p;
959 	char *buf;
960 
961 	ubuf[n] = '\0';
962 	buf = RCAST(char *, ubuf);
963 	while (isspace(CAST(unsigned char, *buf)))
964 		buf++;
965 	DPRINTF("Filter error[[[%s]]]\n", buf);
966 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
967 		*p = '\0';
968 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
969 		*p = '\0';
970 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
971 		++p;
972 		while (isspace(CAST(unsigned char, *p)))
973 			p++;
974 		n = strlen(p);
975 		memmove(ubuf, p, CAST(size_t, n + 1));
976 	}
977 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
978 	if (islower(*ubuf))
979 		*ubuf = toupper(*ubuf);
980 	return n;
981 }
982 
983 private const char *
984 methodname(size_t method)
985 {
986 	switch (method) {
987 #ifdef BUILTIN_DECOMPRESS
988 	case METH_FROZEN:
989 	case METH_ZLIB:
990 		return "zlib";
991 #endif
992 #ifdef BUILTIN_BZLIB
993 	case METH_BZIP:
994 		return "bzlib";
995 #endif
996 #ifdef BUILTIN_XZLIB
997 	case METH_XZ:
998 	case METH_LZMA:
999 		return "xzlib";
1000 #endif
1001 #ifdef BUILTIN_ZSTDLIB
1002 	case METH_ZSTD:
1003 		return "zstd";
1004 #endif
1005 #ifdef BUILTIN_LZLIB
1006 	case METH_LZIP:
1007 		return "lzlib";
1008 #endif
1009 	default:
1010 		return compr[method].argv[0];
1011 	}
1012 }
1013 
1014 private int (*
1015 getdecompressor(int method))(const unsigned char *, unsigned char **, size_t,
1016     size_t *, int)
1017 {
1018 	switch (method) {
1019 #ifdef BUILTIN_DECOMPRESS
1020 	case METH_FROZEN:
1021 		return uncompressgzipped;
1022 	case METH_ZLIB:
1023 		return uncompresszlib;
1024 #endif
1025 #ifdef BUILTIN_BZLIB
1026 	case METH_BZIP:
1027 		return uncompressbzlib;
1028 #endif
1029 #ifdef BUILTIN_XZLIB
1030 	case METH_XZ:
1031 	case METH_LZMA:
1032 		return uncompressxzlib;
1033 #endif
1034 #ifdef BUILTIN_ZSTDLIB
1035 	case METH_ZSTD:
1036 		return uncompresszstd;
1037 #endif
1038 #ifdef BUILTIN_LZLIB
1039 	case METH_LZIP:
1040 		return uncompresslzlib;
1041 #endif
1042 	default:
1043 		return NULL;
1044 	}
1045 }
1046 
1047 private int
1048 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1049     const unsigned char *old, unsigned char **newch, size_t* n)
1050 {
1051 	int fdp[3][2];
1052 	int status, rv, w;
1053 	pid_t pid;
1054 	pid_t writepid = -1;
1055 	size_t i;
1056 	ssize_t r;
1057 	char *const *args;
1058 #ifdef HAVE_POSIX_SPAWNP
1059 	posix_spawn_file_actions_t fa;
1060 #endif
1061 	int (*decompress)(const unsigned char *, unsigned char **,
1062 	    size_t, size_t *, int) = getdecompressor(method);
1063 
1064 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1065 	if (*newch == NULL)
1066 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1067 
1068 	if (decompress) {
1069 		if (nofork) {
1070 			return makeerror(newch, n,
1071 			    "Fork is required to uncompress, but disabled");
1072 		}
1073 		return (*decompress)(old, newch, bytes_max, n, 1);
1074 	}
1075 
1076 	(void)fflush(stdout);
1077 	(void)fflush(stderr);
1078 
1079 	for (i = 0; i < __arraycount(fdp); i++)
1080 		fdp[i][0] = fdp[i][1] = -1;
1081 
1082 	/*
1083 	 * There are multithreaded users who run magic_file()
1084 	 * from dozens of threads. If two parallel magic_file() calls
1085 	 * analyze two large compressed files, both will spawn
1086 	 * an uncompressing child here, which writes out uncompressed data.
1087 	 * We read some portion, then close the pipe, then waitpid() the child.
1088 	 * If uncompressed data is larger, child shound get EPIPE and exit.
1089 	 * However, with *parallel* calls OTHER child may unintentionally
1090 	 * inherit pipe fds, thus keeping pipe open and making writes in
1091 	 * our child block instead of failing with EPIPE!
1092 	 * (For the bug to occur, two threads must mutually inherit their pipes,
1093 	 * and both must have large outputs. Thus it happens not that often).
1094 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1095 	 */
1096 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1097 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1098 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1099 		closep(fdp[STDIN_FILENO]);
1100 		closep(fdp[STDOUT_FILENO]);
1101 		return makeerror(newch, n, "Cannot create pipe, %s",
1102 		    strerror(errno));
1103 	}
1104 
1105 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1106 #ifdef HAVE_POSIX_SPAWNP
1107 	posix_spawn_file_actions_init(&fa);
1108 
1109 	handledesc(&fa, fd, fdp);
1110 
1111 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1112 	    args, NULL);
1113 
1114 	posix_spawn_file_actions_destroy(&fa);
1115 
1116 	if (status == -1) {
1117 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1118 		    compr[method].argv[0], strerror(errno));
1119 	}
1120 #else
1121 	/* For processes with large mapped virtual sizes, vfork
1122 	 * may be _much_ faster (10-100 times) than fork.
1123 	 */
1124 	pid = vfork();
1125 	if (pid == -1) {
1126 		return makeerror(newch, n, "Cannot vfork, %s",
1127 		    strerror(errno));
1128 	}
1129 	if (pid == 0) {
1130 		/* child */
1131 		/* Note: we are after vfork, do not modify memory
1132 		 * in a way which confuses parent. In particular,
1133 		 * do not modify fdp[i][j].
1134 		 */
1135 		handledesc(NULL, fd, fdp);
1136 
1137 		(void)execvp(compr[method].argv[0], args);
1138 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1139 		    compr[method].argv[0], strerror(errno));
1140 		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1141 	}
1142 #endif
1143 	/* parent */
1144 	/* Close write sides of child stdout/err pipes */
1145 	for (i = 1; i < __arraycount(fdp); i++)
1146 		closefd(fdp[i], 1);
1147 	/* Write the buffer data to child stdin, if we don't have fd */
1148 	if (fd == -1) {
1149 		closefd(fdp[STDIN_FILENO], 0);
1150 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1151 		if (writepid == (pid_t)-1) {
1152 			rv = makeerror(newch, n, "Write to child failed, %s",
1153 			    strerror(errno));
1154 			goto err;
1155 		}
1156 		closefd(fdp[STDIN_FILENO], 1);
1157 	}
1158 
1159 	rv = OKDATA;
1160 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1161 	if (r < 0) {
1162 		rv = ERRDATA;
1163 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1164 		        strerror(errno));
1165 		goto err;
1166 	} else if ((r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
1167 	{
1168 		rv = ERRDATA;
1169 		r = filter_error(*newch, r);
1170 		goto ok;
1171 	}
1172 	if  (r == 0)
1173 		goto ok;
1174 	rv = makeerror(newch, n, "Read stderr failed, %s",
1175 	    strerror(errno));
1176 	goto err;
1177 ok:
1178 	*n = r;
1179 	/* NUL terminate, as every buffer is handled here. */
1180 	(*newch)[*n] = '\0';
1181 err:
1182 	closefd(fdp[STDIN_FILENO], 1);
1183 	closefd(fdp[STDOUT_FILENO], 0);
1184 	closefd(fdp[STDERR_FILENO], 0);
1185 
1186 	w = waitpid(pid, &status, 0);
1187 wait_err:
1188 	if (w == -1) {
1189 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1190 		DPRINTF("Child wait return %#x\n", status);
1191 	} else if (!WIFEXITED(status)) {
1192 		DPRINTF("Child not exited (%#x)\n", status);
1193 	} else if (WEXITSTATUS(status) != 0) {
1194 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1195 	}
1196 	if (writepid > 0) {
1197 		/* _After_ we know decompressor has exited, our input writer
1198 		 * definitely will exit now (at worst, writing fails in it,
1199 		 * since output fd is closed now on the reading size).
1200 		 */
1201 		w = waitpid(writepid, &status, 0);
1202 		writepid = -1;
1203 		goto wait_err;
1204 	}
1205 
1206 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1207 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1208 
1209 	return rv;
1210 }
1211 #endif
1212