xref: /netbsd-src/external/bsd/file/dist/src/compress.c (revision ddb176824c39fb0db5ceef3e9e40dcaa273aec38)
1 /*	$NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * compress routines:
32  *	zmagic() - returns 0 if not recognized, uncompresses and prints
33  *		   information if recognized
34  *	uncompress(method, old, n, newch) - uncompress old into new,
35  *					    using method, return sizeof new
36  */
37 #include "file.h"
38 
39 #ifndef lint
40 #if 0
41 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
42 #else
43 __RCSID("$NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $");
44 #endif
45 #endif
46 
47 #include "magic.h"
48 #include <stdlib.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 #ifdef HAVE_SPAWN_H
53 #include <spawn.h>
54 #endif
55 #include <string.h>
56 #include <errno.h>
57 #include <ctype.h>
58 #include <stdarg.h>
59 #include <signal.h>
60 #ifndef HAVE_SIG_T
61 typedef void (*sig_t)(int);
62 #endif /* HAVE_SIG_T */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
65 #endif
66 #ifdef HAVE_SYS_WAIT_H
67 #include <sys/wait.h>
68 #endif
69 #if defined(HAVE_SYS_TIME_H)
70 #include <sys/time.h>
71 #endif
72 
73 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
74 #define BUILTIN_DECOMPRESS
75 #include <zlib.h>
76 #endif
77 
78 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
79 #define BUILTIN_BZLIB
80 #include <bzlib.h>
81 #endif
82 
83 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
84 #define BUILTIN_XZLIB
85 #include <lzma.h>
86 #endif
87 
88 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
89 #define BUILTIN_ZSTDLIB
90 #include <zstd.h>
91 #include <zstd_errors.h>
92 #endif
93 
94 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
95 #define BUILTIN_LZLIB
96 #include <lzlib.h>
97 #endif
98 
99 #ifdef DEBUG
100 int tty = -1;
101 #define DPRINTF(...)	do { \
102 	if (tty == -1) \
103 		tty = open("/dev/tty", O_RDWR); \
104 	if (tty == -1) \
105 		abort(); \
106 	dprintf(tty, __VA_ARGS__); \
107 } while (/*CONSTCOND*/0)
108 #else
109 #define DPRINTF(...)
110 #endif
111 
112 #ifdef ZLIBSUPPORT
113 /*
114  * The following python code is not really used because ZLIBSUPPORT is only
115  * defined if we have a built-in zlib, and the built-in zlib handles that.
116  * That is not true for android where we have zlib.h and not -lz.
117  */
118 static const char zlibcode[] =
119     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
120 
121 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
122 
123 static int
zlibcmp(const unsigned char * buf)124 zlibcmp(const unsigned char *buf)
125 {
126 	unsigned short x = 1;
127 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
128 
129 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
130 		return 0;
131 	if (s[0] != 1)	/* endianness test */
132 		x = buf[0] | (buf[1] << 8);
133 	else
134 		x = buf[1] | (buf[0] << 8);
135 	if (x % 31)
136 		return 0;
137 	return 1;
138 }
139 #endif
140 
141 static int
lzmacmp(const unsigned char * buf)142 lzmacmp(const unsigned char *buf)
143 {
144 	if (buf[0] != 0x5d || buf[1] || buf[2])
145 		return 0;
146 	if (buf[12] && buf[12] != 0xff)
147 		return 0;
148 	return 1;
149 }
150 
151 #define gzip_flags "-cd"
152 #define lzip_flags gzip_flags
153 
154 static const char *gzip_args[] = {
155 	"gzip", gzip_flags, NULL
156 };
157 static const char *uncompress_args[] = {
158 	"uncompress", "-c", NULL
159 };
160 static const char *bzip2_args[] = {
161 	"bzip2", "-cd", NULL
162 };
163 static const char *lzip_args[] = {
164 	"lzip", lzip_flags, NULL
165 };
166 static const char *xz_args[] = {
167 	"xz", "-cd", NULL
168 };
169 static const char *lrzip_args[] = {
170 	"lrzip", "-qdf", "-", NULL
171 };
172 static const char *lz4_args[] = {
173 	"lz4", "-cd", NULL
174 };
175 static const char *zstd_args[] = {
176 	"zstd", "-cd", NULL
177 };
178 
179 #define	do_zlib		NULL
180 #define	do_bzlib	NULL
181 
182 file_private const struct {
183 	union {
184 		const char *magic;
185 		int (*func)(const unsigned char *);
186 	} u;
187 	int maglen;
188 	const char **argv;
189 	void *unused;
190 } compr[] = {
191 #define METH_FROZEN	2
192 #define METH_BZIP	7
193 #define METH_XZ		9
194 #define METH_LZIP	8
195 #define METH_ZSTD	12
196 #define METH_LZMA	13
197 #define METH_ZLIB	14
198     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
199     /* Uncompress can get stuck; so use gzip first if we have it
200      * Idea from Damien Clark, thanks! */
201     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
202     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
203     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
204     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
205     /* the standard pack utilities do not accept standard input */
206     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
207     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
208     /* ...only first file examined */
209     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
210     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
211     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
212     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
213     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
214     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
215     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
216 #ifdef ZLIBSUPPORT
217     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
218 #endif
219 };
220 
221 #define OKDATA 	0
222 #define NODATA	1
223 #define ERRDATA	2
224 
225 file_private ssize_t swrite(int, const void *, size_t);
226 #if HAVE_FORK
227 file_private size_t ncompr = __arraycount(compr);
228 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
229     unsigned char **, size_t *);
230 #ifdef BUILTIN_DECOMPRESS
231 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
232     size_t *, int);
233 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
234     size_t *, int);
235 #endif
236 #ifdef BUILTIN_BZLIB
237 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
238     size_t *, int);
239 #endif
240 #ifdef BUILTIN_XZLIB
241 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
242     size_t *, int);
243 #endif
244 #ifdef BUILTIN_ZSTDLIB
245 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
246     size_t *, int);
247 #endif
248 #ifdef BUILTIN_LZLIB
249 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
250     size_t *, int);
251 #endif
252 
253 static int makeerror(unsigned char **, size_t *, const char *, ...)
254     __attribute__((__format__(__printf__, 3, 4)));
255 file_private const char *methodname(size_t);
256 
257 file_private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)258 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
259 {
260 	unsigned char *p;
261 	int mime = ms->flags & MAGIC_MIME;
262 
263 	if (!mime)
264 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
265 
266 	for (p = buf; *p; p++)
267 		if (!isalnum(*p))
268 			*p = '-';
269 
270 	return file_printf(ms, "application/x-decompression-error-%s-%s",
271 	    methodname(i), buf);
272 }
273 
274 file_protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)275 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
276 {
277 	unsigned char *newbuf = NULL;
278 	size_t i, nsz;
279 	char *rbuf;
280 	file_pushbuf_t *pb;
281 	int urv, prv, rv = 0;
282 	int mime = ms->flags & MAGIC_MIME;
283 	int fd = b->fd;
284 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
285 	size_t nbytes = b->flen;
286 	int sa_saved = 0;
287 	struct sigaction sig_act;
288 
289 	if ((ms->flags & MAGIC_COMPRESS) == 0)
290 		return 0;
291 
292 	for (i = 0; i < ncompr; i++) {
293 		int zm;
294 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
295 			continue;
296 		if (compr[i].maglen < 0) {
297 			zm = (*compr[i].u.func)(buf);
298 		} else {
299 			zm = memcmp(buf, compr[i].u.magic,
300 			    CAST(size_t, compr[i].maglen)) == 0;
301 		}
302 
303 		if (!zm)
304 			continue;
305 
306 		/* Prevent SIGPIPE death if child dies unexpectedly */
307 		if (!sa_saved) {
308 			//We can use sig_act for both new and old, but
309 			struct sigaction new_act;
310 			memset(&new_act, 0, sizeof(new_act));
311 			new_act.sa_handler = SIG_IGN;
312 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
313 		}
314 
315 		nsz = nbytes;
316 		free(newbuf);
317 		urv = uncompressbuf(fd, ms->bytes_max, i,
318 		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
319 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
320 		    (char *)newbuf, nsz);
321 		switch (urv) {
322 		case OKDATA:
323 		case ERRDATA:
324 			ms->flags &= ~MAGIC_COMPRESS;
325 			if (urv == ERRDATA)
326 				prv = format_decompression_error(ms, i, newbuf);
327 			else
328 				prv = file_buffer(ms, -1, NULL, name, newbuf,
329 				    nsz);
330 			if (prv == -1)
331 				goto error;
332 			rv = 1;
333 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
334 				goto out;
335 			if (mime != MAGIC_MIME && mime != 0)
336 				goto out;
337 			if ((file_printf(ms,
338 			    mime ? " compressed-encoding=" : " (")) == -1)
339 				goto error;
340 			if ((pb = file_push_buffer(ms)) == NULL)
341 				goto error;
342 			/*
343 			 * XXX: If file_buffer fails here, we overwrite
344 			 * the compressed text. FIXME.
345 			 */
346 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
347 			{
348 				if (file_pop_buffer(ms, pb) != NULL)
349 					abort();
350 				goto error;
351 			}
352 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
353 				if (file_printf(ms, "%s", rbuf) == -1) {
354 					free(rbuf);
355 					goto error;
356 				}
357 				free(rbuf);
358 			}
359 			if (!mime && file_printf(ms, ")") == -1)
360 				goto error;
361 			/*FALLTHROUGH*/
362 		case NODATA:
363 			break;
364 		default:
365 			abort();
366 			/*NOTREACHED*/
367 		error:
368 			rv = -1;
369 			break;
370 		}
371 	}
372 out:
373 	DPRINTF("rv = %d\n", rv);
374 
375 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
376 		(void)sigaction(SIGPIPE, &sig_act, NULL);
377 
378 	free(newbuf);
379 	ms->flags |= MAGIC_COMPRESS;
380 	DPRINTF("Zmagic returns %d\n", rv);
381 	return rv;
382 }
383 #endif
384 /*
385  * `safe' write for sockets and pipes.
386  */
387 file_private ssize_t
swrite(int fd,const void * buf,size_t n)388 swrite(int fd, const void *buf, size_t n)
389 {
390 	ssize_t rv;
391 	size_t rn = n;
392 
393 	do
394 		switch (rv = write(fd, buf, n)) {
395 		case -1:
396 			if (errno == EINTR)
397 				continue;
398 			return -1;
399 		default:
400 			n -= rv;
401 			buf = CAST(const char *, buf) + rv;
402 			break;
403 		}
404 	while (n > 0);
405 	return rn;
406 }
407 
408 
409 /*
410  * `safe' read for sockets and pipes.
411  */
412 file_protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)413 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
414 {
415 	ssize_t rv;
416 #if defined(FIONREAD) && !defined(__MINGW32__)
417 	int t = 0;
418 #endif
419 	size_t rn = n;
420 
421 	if (fd == STDIN_FILENO)
422 		goto nocheck;
423 
424 #if defined(FIONREAD) && !defined(__MINGW32__)
425 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
426 #ifdef FD_ZERO
427 		ssize_t cnt;
428 		for (cnt = 0;; cnt++) {
429 			fd_set check;
430 			struct timeval tout = {0, 100 * 1000};
431 			int selrv;
432 
433 			FD_ZERO(&check);
434 			FD_SET(fd, &check);
435 
436 			/*
437 			 * Avoid soft deadlock: do not read if there
438 			 * is nothing to read from sockets and pipes.
439 			 */
440 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
441 			if (selrv == -1) {
442 				if (errno == EINTR || errno == EAGAIN)
443 					continue;
444 			} else if (selrv == 0 && cnt >= 5) {
445 				return 0;
446 			} else
447 				break;
448 		}
449 #endif
450 		(void)ioctl(fd, FIONREAD, &t);
451 	}
452 
453 	if (t > 0 && CAST(size_t, t) < n) {
454 		n = t;
455 		rn = n;
456 	}
457 #endif
458 
459 nocheck:
460 	do
461 		switch ((rv = read(fd, buf, n))) {
462 		case -1:
463 			if (errno == EINTR)
464 				continue;
465 			return -1;
466 		case 0:
467 			return rn - n;
468 		default:
469 			n -= rv;
470 			buf = CAST(char *, CCAST(void *, buf)) + rv;
471 			break;
472 		}
473 	while (n > 0);
474 	return rn;
475 }
476 
477 file_protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)478 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
479     size_t nbytes)
480 {
481 	char buf[4096];
482 	ssize_t r;
483 	int tfd;
484 
485 #ifdef WIN32
486 	const char *t;
487 	buf[0] = '\0';
488 	if ((t = getenv("TEMP")) != NULL)
489 		(void)strlcpy(buf, t, sizeof(buf));
490 	else if ((t = getenv("TMP")) != NULL)
491 		(void)strlcpy(buf, t, sizeof(buf));
492 	else if ((t = getenv("TMPDIR")) != NULL)
493 		(void)strlcpy(buf, t, sizeof(buf));
494 	if (buf[0] != '\0')
495 		(void)strlcat(buf, "/", sizeof(buf));
496 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
497 #else
498 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
499 #endif
500 #ifndef HAVE_MKSTEMP
501 	{
502 		char *ptr = mktemp(buf);
503 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
504 		r = errno;
505 		(void)unlink(ptr);
506 		errno = r;
507 	}
508 #else
509 	{
510 		int te;
511 		mode_t ou = umask(0);
512 		tfd = mkstemp(buf);
513 		(void)umask(ou);
514 		te = errno;
515 		(void)unlink(buf);
516 		errno = te;
517 	}
518 #endif
519 	if (tfd == -1) {
520 		file_error(ms, errno,
521 		    "cannot create temporary file for pipe copy");
522 		return -1;
523 	}
524 
525 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
526 		r = 1;
527 	else {
528 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
529 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
530 				break;
531 	}
532 
533 	switch (r) {
534 	case -1:
535 		file_error(ms, errno, "error copying from pipe to temp file");
536 		return -1;
537 	case 0:
538 		break;
539 	default:
540 		file_error(ms, errno, "error while writing to temp file");
541 		return -1;
542 	}
543 
544 	/*
545 	 * We duplicate the file descriptor, because fclose on a
546 	 * tmpfile will delete the file, but any open descriptors
547 	 * can still access the phantom inode.
548 	 */
549 	if ((fd = dup2(tfd, fd)) == -1) {
550 		file_error(ms, errno, "could not dup descriptor for temp file");
551 		return -1;
552 	}
553 	(void)close(tfd);
554 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
555 		file_badseek(ms);
556 		return -1;
557 	}
558 	return fd;
559 }
560 #if HAVE_FORK
561 #ifdef BUILTIN_DECOMPRESS
562 
563 #define FHCRC		(1 << 1)
564 #define FEXTRA		(1 << 2)
565 #define FNAME		(1 << 3)
566 #define FCOMMENT	(1 << 4)
567 
568 
569 file_private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)570 uncompressgzipped(const unsigned char *old, unsigned char **newch,
571     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
572 {
573 	unsigned char flg;
574 	size_t data_start = 10;
575 
576 	if (*n < 4) {
577 		goto err;
578 	}
579 
580 	flg = old[3];
581 
582 	if (flg & FEXTRA) {
583 		if (data_start + 1 >= *n)
584 			goto err;
585 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
586 	}
587 	if (flg & FNAME) {
588 		while(data_start < *n && old[data_start])
589 			data_start++;
590 		data_start++;
591 	}
592 	if (flg & FCOMMENT) {
593 		while(data_start < *n && old[data_start])
594 			data_start++;
595 		data_start++;
596 	}
597 	if (flg & FHCRC)
598 		data_start += 2;
599 
600 	if (data_start >= *n)
601 		goto err;
602 
603 	*n -= data_start;
604 	old += data_start;
605 	return uncompresszlib(old, newch, bytes_max, n, 0);
606 err:
607 	return makeerror(newch, n, "File too short");
608 }
609 
610 file_private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)611 uncompresszlib(const unsigned char *old, unsigned char **newch,
612     size_t bytes_max, size_t *n, int zlib)
613 {
614 	int rc;
615 	z_stream z;
616 
617 	DPRINTF("builtin zlib decompression\n");
618 	z.next_in = CCAST(Bytef *, old);
619 	z.avail_in = CAST(uint32_t, *n);
620 	z.next_out = *newch;
621 	z.avail_out = CAST(unsigned int, bytes_max);
622 	z.zalloc = Z_NULL;
623 	z.zfree = Z_NULL;
624 	z.opaque = Z_NULL;
625 
626 	/* LINTED bug in header macro */
627 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
628 	if (rc != Z_OK)
629 		goto err;
630 
631 	rc = inflate(&z, Z_SYNC_FLUSH);
632 	if (rc != Z_OK && rc != Z_STREAM_END) {
633 		inflateEnd(&z);
634 		goto err;
635 	}
636 
637 	*n = CAST(size_t, z.total_out);
638 	rc = inflateEnd(&z);
639 	if (rc != Z_OK)
640 		goto err;
641 
642 	/* let's keep the nul-terminate tradition */
643 	(*newch)[*n] = '\0';
644 
645 	return OKDATA;
646 err:
647 	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
648 }
649 #endif
650 
651 #ifdef BUILTIN_BZLIB
652 file_private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)653 uncompressbzlib(const unsigned char *old, unsigned char **newch,
654     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
655 {
656 	int rc;
657 	bz_stream bz;
658 
659 	DPRINTF("builtin bzlib decompression\n");
660 	memset(&bz, 0, sizeof(bz));
661 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
662 	if (rc != BZ_OK)
663 		goto err;
664 
665 	bz.next_in = CCAST(char *, RCAST(const char *, old));
666 	bz.avail_in = CAST(uint32_t, *n);
667 	bz.next_out = RCAST(char *, *newch);
668 	bz.avail_out = CAST(unsigned int, bytes_max);
669 
670 	rc = BZ2_bzDecompress(&bz);
671 	if (rc != BZ_OK && rc != BZ_STREAM_END) {
672 		BZ2_bzDecompressEnd(&bz);
673 		goto err;
674 	}
675 
676 	/* Assume byte_max is within 32bit */
677 	/* assert(bz.total_out_hi32 == 0); */
678 	*n = CAST(size_t, bz.total_out_lo32);
679 	rc = BZ2_bzDecompressEnd(&bz);
680 	if (rc != BZ_OK)
681 		goto err;
682 
683 	/* let's keep the nul-terminate tradition */
684 	(*newch)[*n] = '\0';
685 
686 	return OKDATA;
687 err:
688 	return makeerror(newch, n, "bunzip error %d", rc);
689 }
690 #endif
691 
692 #ifdef BUILTIN_XZLIB
693 file_private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)694 uncompressxzlib(const unsigned char *old, unsigned char **newch,
695     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
696 {
697 	int rc;
698 	lzma_stream xz;
699 
700 	DPRINTF("builtin xzlib decompression\n");
701 	memset(&xz, 0, sizeof(xz));
702 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
703 	if (rc != LZMA_OK)
704 		goto err;
705 
706 	xz.next_in = CCAST(const uint8_t *, old);
707 	xz.avail_in = CAST(uint32_t, *n);
708 	xz.next_out = RCAST(uint8_t *, *newch);
709 	xz.avail_out = CAST(unsigned int, bytes_max);
710 
711 	rc = lzma_code(&xz, LZMA_RUN);
712 	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
713 		lzma_end(&xz);
714 		goto err;
715 	}
716 
717 	*n = CAST(size_t, xz.total_out);
718 
719 	lzma_end(&xz);
720 
721 	/* let's keep the nul-terminate tradition */
722 	(*newch)[*n] = '\0';
723 
724 	return OKDATA;
725 err:
726 	return makeerror(newch, n, "unxz error %d", rc);
727 }
728 #endif
729 
730 #ifdef BUILTIN_ZSTDLIB
731 file_private int
uncompresszstd(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)732 uncompresszstd(const unsigned char *old, unsigned char **newch,
733     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
734 {
735 	size_t rc;
736 	ZSTD_DStream *zstd;
737 	ZSTD_inBuffer in;
738 	ZSTD_outBuffer out;
739 
740 	DPRINTF("builtin zstd decompression\n");
741 	if ((zstd = ZSTD_createDStream()) == NULL) {
742 		return makeerror(newch, n, "No ZSTD decompression stream, %s",
743 		    strerror(errno));
744 	}
745 
746 	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
747 	if (ZSTD_isError(rc))
748 		goto err;
749 
750 	in.src = CCAST(const void *, old);
751 	in.size = *n;
752 	in.pos = 0;
753 	out.dst = RCAST(void *, *newch);
754 	out.size = bytes_max;
755 	out.pos = 0;
756 
757 	rc = ZSTD_decompressStream(zstd, &out, &in);
758 	if (ZSTD_isError(rc))
759 		goto err;
760 
761 	*n = out.pos;
762 
763 	ZSTD_freeDStream(zstd);
764 
765 	/* let's keep the nul-terminate tradition */
766 	(*newch)[*n] = '\0';
767 
768 	return OKDATA;
769 err:
770 	ZSTD_freeDStream(zstd);
771 	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
772 }
773 #endif
774 
775 #ifdef BUILTIN_LZLIB
776 file_private int
uncompresslzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)777 uncompresslzlib(const unsigned char *old, unsigned char **newch,
778     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
779 {
780 	enum LZ_Errno err;
781 	size_t old_remaining = *n;
782 	size_t new_remaining = bytes_max;
783 	size_t total_read = 0;
784 	unsigned char *bufp;
785 	struct LZ_Decoder *dec;
786 
787 	bufp = *newch;
788 
789 	DPRINTF("builtin lzlib decompression\n");
790 	dec = LZ_decompress_open();
791 	if (!dec) {
792 		return makeerror(newch, n, "unable to allocate LZ_Decoder");
793 	}
794 	if (LZ_decompress_errno(dec) != LZ_ok)
795 		goto err;
796 
797 	for (;;) {
798 		// LZ_decompress_read() stops at member boundaries, so we may
799 		// have more than one successful read after writing all data
800 		// we have.
801 		if (old_remaining > 0) {
802 			int wr = LZ_decompress_write(dec, old, old_remaining);
803 			if (wr < 0)
804 				goto err;
805 			old_remaining -= wr;
806 			old += wr;
807 		}
808 
809 		int rd = LZ_decompress_read(dec, bufp, new_remaining);
810 		if (rd > 0) {
811 			new_remaining -= rd;
812 			bufp += rd;
813 			total_read += rd;
814 		}
815 
816 		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
817 			goto err;
818 		if (new_remaining == 0)
819 			break;
820 		if (old_remaining == 0 && rd == 0)
821 			break;
822 	}
823 
824 	LZ_decompress_close(dec);
825 	*n = total_read;
826 
827 	/* let's keep the nul-terminate tradition */
828 	*bufp = '\0';
829 
830 	return OKDATA;
831 err:
832 	err = LZ_decompress_errno(dec);
833 	LZ_decompress_close(dec);
834 	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
835 }
836 #endif
837 
838 
839 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)840 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
841 {
842 	char *msg;
843 	va_list ap;
844 	int rv;
845 
846 	DPRINTF("Makeerror %s\n", fmt);
847 	free(*buf);
848 	va_start(ap, fmt);
849 	rv = vasprintf(&msg, fmt, ap);
850 	va_end(ap);
851 	if (rv < 0) {
852 		DPRINTF("Makeerror failed");
853 		*buf = NULL;
854 		*len = 0;
855 		return NODATA;
856 	}
857 	*buf = RCAST(unsigned char *, msg);
858 	*len = strlen(msg);
859 	return ERRDATA;
860 }
861 
862 static void
closefd(int * fd,size_t i)863 closefd(int *fd, size_t i)
864 {
865 	if (fd[i] == -1)
866 		return;
867 	(void) close(fd[i]);
868 	fd[i] = -1;
869 }
870 
871 static void
closep(int * fd)872 closep(int *fd)
873 {
874 	size_t i;
875 	for (i = 0; i < 2; i++)
876 		closefd(fd, i);
877 }
878 
879 static void
movedesc(void * v,int i,int fd)880 movedesc(void *v, int i, int fd)
881 {
882 	if (fd == i)
883 		return; /* "no dup was necessary" */
884 #ifdef HAVE_POSIX_SPAWNP
885 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
886 	posix_spawn_file_actions_adddup2(fa, fd, i);
887 	posix_spawn_file_actions_addclose(fa, fd);
888 #else
889 	if (dup2(fd, i) == -1) {
890 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
891 		exit(EXIT_FAILURE);
892 	}
893 	close(v ? fd : fd);
894 #endif
895 }
896 
897 static void
closedesc(void * v,int fd)898 closedesc(void *v, int fd)
899 {
900 #ifdef HAVE_POSIX_SPAWNP
901 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
902 	posix_spawn_file_actions_addclose(fa, fd);
903 #else
904 	close(v ? fd : fd);
905 #endif
906 }
907 
908 static void
handledesc(void * v,int fd,int fdp[3][2])909 handledesc(void *v, int fd, int fdp[3][2])
910 {
911 	if (fd != -1) {
912 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
913 		movedesc(v, STDIN_FILENO, fd);
914 	} else {
915 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
916 		if (fdp[STDIN_FILENO][1] > 2)
917 		    closedesc(v, fdp[STDIN_FILENO][1]);
918 	}
919 
920 	file_clear_closexec(STDIN_FILENO);
921 
922 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
923 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
924 	if (fdp[STDOUT_FILENO][0] > 2)
925 		closedesc(v, fdp[STDOUT_FILENO][0]);
926 
927 	file_clear_closexec(STDOUT_FILENO);
928 
929 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
930 	if (fdp[STDERR_FILENO][0] > 2)
931 		closedesc(v, fdp[STDERR_FILENO][0]);
932 
933 	file_clear_closexec(STDERR_FILENO);
934 }
935 
936 static pid_t
writechild(int fd,const void * old,size_t n)937 writechild(int fd, const void *old, size_t n)
938 {
939 	pid_t pid;
940 
941 	/*
942 	 * fork again, to avoid blocking because both
943 	 * pipes filled
944 	 */
945 	pid = fork();
946 	if (pid == -1) {
947 		DPRINTF("Fork failed (%s)\n", strerror(errno));
948 		return -1;
949 	}
950 	if (pid == 0) {
951 		/* child */
952 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
953 			DPRINTF("Write failed (%s)\n", strerror(errno));
954 			exit(EXIT_FAILURE);
955 		}
956 		exit(EXIT_SUCCESS);
957 	}
958 	/* parent */
959 	return pid;
960 }
961 
962 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)963 filter_error(unsigned char *ubuf, ssize_t n)
964 {
965 	char *p;
966 	char *buf;
967 
968 	ubuf[n] = '\0';
969 	buf = RCAST(char *, ubuf);
970 	while (isspace(CAST(unsigned char, *buf)))
971 		buf++;
972 	DPRINTF("Filter error[[[%s]]]\n", buf);
973 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
974 		*p = '\0';
975 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
976 		*p = '\0';
977 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
978 		++p;
979 		while (isspace(CAST(unsigned char, *p)))
980 			p++;
981 		n = strlen(p);
982 		memmove(ubuf, p, CAST(size_t, n + 1));
983 	}
984 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
985 	if (islower(*ubuf))
986 		*ubuf = toupper(*ubuf);
987 	return n;
988 }
989 
990 file_private const char *
methodname(size_t method)991 methodname(size_t method)
992 {
993 	switch (method) {
994 #ifdef BUILTIN_DECOMPRESS
995 	case METH_FROZEN:
996 	case METH_ZLIB:
997 		return "zlib";
998 #endif
999 #ifdef BUILTIN_BZLIB
1000 	case METH_BZIP:
1001 		return "bzlib";
1002 #endif
1003 #ifdef BUILTIN_XZLIB
1004 	case METH_XZ:
1005 	case METH_LZMA:
1006 		return "xzlib";
1007 #endif
1008 #ifdef BUILTIN_ZSTDLIB
1009 	case METH_ZSTD:
1010 		return "zstd";
1011 #endif
1012 #ifdef BUILTIN_LZLIB
1013 	case METH_LZIP:
1014 		return "lzlib";
1015 #endif
1016 	default:
1017 		return compr[method].argv[0];
1018 	}
1019 }
1020 
1021 file_private int (*
getdecompressor(size_t method)1022 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1023     size_t *, int)
1024 {
1025 	switch (method) {
1026 #ifdef BUILTIN_DECOMPRESS
1027 	case METH_FROZEN:
1028 		return uncompressgzipped;
1029 	case METH_ZLIB:
1030 		return uncompresszlib;
1031 #endif
1032 #ifdef BUILTIN_BZLIB
1033 	case METH_BZIP:
1034 		return uncompressbzlib;
1035 #endif
1036 #ifdef BUILTIN_XZLIB
1037 	case METH_XZ:
1038 	case METH_LZMA:
1039 		return uncompressxzlib;
1040 #endif
1041 #ifdef BUILTIN_ZSTDLIB
1042 	case METH_ZSTD:
1043 		return uncompresszstd;
1044 #endif
1045 #ifdef BUILTIN_LZLIB
1046 	case METH_LZIP:
1047 		return uncompresslzlib;
1048 #endif
1049 	default:
1050 		return NULL;
1051 	}
1052 }
1053 
1054 file_private int
uncompressbuf(int fd,size_t bytes_max,size_t method,int nofork,const unsigned char * old,unsigned char ** newch,size_t * n)1055 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1056     const unsigned char *old, unsigned char **newch, size_t* n)
1057 {
1058 	int fdp[3][2];
1059 	int status, rv, w;
1060 	pid_t pid;
1061 	pid_t writepid = -1;
1062 	size_t i;
1063 	ssize_t r, re;
1064 	char *const *args;
1065 #ifdef HAVE_POSIX_SPAWNP
1066 	posix_spawn_file_actions_t fa;
1067 #endif
1068 	int (*decompress)(const unsigned char *, unsigned char **,
1069 	    size_t, size_t *, int) = getdecompressor(method);
1070 
1071 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1072 	if (*newch == NULL)
1073 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1074 
1075 	if (decompress) {
1076 		if (nofork) {
1077 			return makeerror(newch, n,
1078 			    "Fork is required to uncompress, but disabled");
1079 		}
1080 		return (*decompress)(old, newch, bytes_max, n, 1);
1081 	}
1082 
1083 	(void)fflush(stdout);
1084 	(void)fflush(stderr);
1085 
1086 	for (i = 0; i < __arraycount(fdp); i++)
1087 		fdp[i][0] = fdp[i][1] = -1;
1088 
1089 	/*
1090 	 * There are multithreaded users who run magic_file()
1091 	 * from dozens of threads. If two parallel magic_file() calls
1092 	 * analyze two large compressed files, both will spawn
1093 	 * an uncompressing child here, which writes out uncompressed data.
1094 	 * We read some portion, then close the pipe, then waitpid() the child.
1095 	 * If uncompressed data is larger, child should get EPIPE and exit.
1096 	 * However, with *parallel* calls OTHER child may unintentionally
1097 	 * inherit pipe fds, thus keeping pipe open and making writes in
1098 	 * our child block instead of failing with EPIPE!
1099 	 * (For the bug to occur, two threads must mutually inherit their pipes,
1100 	 * and both must have large outputs. Thus it happens not that often).
1101 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1102 	 */
1103 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1104 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1105 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1106 		closep(fdp[STDIN_FILENO]);
1107 		closep(fdp[STDOUT_FILENO]);
1108 		return makeerror(newch, n, "Cannot create pipe, %s",
1109 		    strerror(errno));
1110 	}
1111 
1112 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1113 #ifdef HAVE_POSIX_SPAWNP
1114 	posix_spawn_file_actions_init(&fa);
1115 
1116 	handledesc(&fa, fd, fdp);
1117 
1118 	DPRINTF("Executing %s\n", compr[method].argv[0]);
1119 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1120 	    args, NULL);
1121 
1122 	posix_spawn_file_actions_destroy(&fa);
1123 
1124 	if (status == -1) {
1125 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1126 		    compr[method].argv[0], strerror(errno));
1127 	}
1128 #else
1129 	/* For processes with large mapped virtual sizes, vfork
1130 	 * may be _much_ faster (10-100 times) than fork.
1131 	 */
1132 	pid = vfork();
1133 	if (pid == -1) {
1134 		return makeerror(newch, n, "Cannot vfork, %s",
1135 		    strerror(errno));
1136 	}
1137 	if (pid == 0) {
1138 		/* child */
1139 		/* Note: we are after vfork, do not modify memory
1140 		 * in a way which confuses parent. In particular,
1141 		 * do not modify fdp[i][j].
1142 		 */
1143 		handledesc(NULL, fd, fdp);
1144 		DPRINTF("Executing %s\n", compr[method].argv[0]);
1145 
1146 		(void)execvp(compr[method].argv[0], args);
1147 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1148 		    compr[method].argv[0], strerror(errno));
1149 		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1150 	}
1151 #endif
1152 	/* parent */
1153 	/* Close write sides of child stdout/err pipes */
1154 	for (i = 1; i < __arraycount(fdp); i++)
1155 		closefd(fdp[i], 1);
1156 	/* Write the buffer data to child stdin, if we don't have fd */
1157 	if (fd == -1) {
1158 		closefd(fdp[STDIN_FILENO], 0);
1159 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1160 		if (writepid == (pid_t)-1) {
1161 			rv = makeerror(newch, n, "Write to child failed, %s",
1162 			    strerror(errno));
1163 			DPRINTF("Write to child failed\n");
1164 			goto err;
1165 		}
1166 		closefd(fdp[STDIN_FILENO], 1);
1167 	}
1168 
1169 	rv = OKDATA;
1170 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1171 	DPRINTF("read got %zd\n", r);
1172 	if (r < 0) {
1173 		rv = ERRDATA;
1174 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1175 		        strerror(errno));
1176 		goto err;
1177 	}
1178 	if (CAST(size_t, r) == bytes_max) {
1179 		/*
1180 		 * close fd so that the child exits with sigpipe and ignore
1181 		 * errors, otherwise we risk the child blocking and never
1182 		 * exiting.
1183 		 */
1184 		DPRINTF("Closing stdout for bytes_max\n");
1185 		closefd(fdp[STDOUT_FILENO], 0);
1186 		goto ok;
1187 	}
1188 	if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1189 		DPRINTF("Got stuff from stderr %s\n", *newch);
1190 		rv = ERRDATA;
1191 		r = filter_error(*newch, r);
1192 		goto ok;
1193 	}
1194 	if  (re == 0)
1195 		goto ok;
1196 	rv = makeerror(newch, n, "Read stderr failed, %s",
1197 	    strerror(errno));
1198 	goto err;
1199 ok:
1200 	*n = r;
1201 	/* NUL terminate, as every buffer is handled here. */
1202 	(*newch)[*n] = '\0';
1203 err:
1204 	closefd(fdp[STDIN_FILENO], 1);
1205 	closefd(fdp[STDOUT_FILENO], 0);
1206 	closefd(fdp[STDERR_FILENO], 0);
1207 
1208 	w = waitpid(pid, &status, 0);
1209 wait_err:
1210 	if (w == -1) {
1211 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1212 		DPRINTF("Child wait return %#x\n", status);
1213 	} else if (!WIFEXITED(status)) {
1214 		DPRINTF("Child not exited (%#x)\n", status);
1215 	} else if (WEXITSTATUS(status) != 0) {
1216 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1217 	}
1218 	if (writepid > 0) {
1219 		/* _After_ we know decompressor has exited, our input writer
1220 		 * definitely will exit now (at worst, writing fails in it,
1221 		 * since output fd is closed now on the reading size).
1222 		 */
1223 		w = waitpid(writepid, &status, 0);
1224 		writepid = -1;
1225 		goto wait_err;
1226 	}
1227 
1228 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1229 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1230 
1231 	return rv;
1232 }
1233 #endif
1234