1 /* $NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $ */
2
3 /*
4 * Copyright (c) Ian F. Darwin 1986-1995.
5 * Software written by Ian F. Darwin and others;
6 * maintained 1995-present by Christos Zoulas and others.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice immediately at the beginning of the file, without modification,
13 * this list of conditions, and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30 /*
31 * compress routines:
32 * zmagic() - returns 0 if not recognized, uncompresses and prints
33 * information if recognized
34 * uncompress(method, old, n, newch) - uncompress old into new,
35 * using method, return sizeof new
36 */
37 #include "file.h"
38
39 #ifndef lint
40 #if 0
41 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
42 #else
43 __RCSID("$NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $");
44 #endif
45 #endif
46
47 #include "magic.h"
48 #include <stdlib.h>
49 #ifdef HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 #ifdef HAVE_SPAWN_H
53 #include <spawn.h>
54 #endif
55 #include <string.h>
56 #include <errno.h>
57 #include <ctype.h>
58 #include <stdarg.h>
59 #include <signal.h>
60 #ifndef HAVE_SIG_T
61 typedef void (*sig_t)(int);
62 #endif /* HAVE_SIG_T */
63 #ifdef HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
65 #endif
66 #ifdef HAVE_SYS_WAIT_H
67 #include <sys/wait.h>
68 #endif
69 #if defined(HAVE_SYS_TIME_H)
70 #include <sys/time.h>
71 #endif
72
73 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
74 #define BUILTIN_DECOMPRESS
75 #include <zlib.h>
76 #endif
77
78 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
79 #define BUILTIN_BZLIB
80 #include <bzlib.h>
81 #endif
82
83 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
84 #define BUILTIN_XZLIB
85 #include <lzma.h>
86 #endif
87
88 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
89 #define BUILTIN_ZSTDLIB
90 #include <zstd.h>
91 #include <zstd_errors.h>
92 #endif
93
94 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
95 #define BUILTIN_LZLIB
96 #include <lzlib.h>
97 #endif
98
99 #ifdef DEBUG
100 int tty = -1;
101 #define DPRINTF(...) do { \
102 if (tty == -1) \
103 tty = open("/dev/tty", O_RDWR); \
104 if (tty == -1) \
105 abort(); \
106 dprintf(tty, __VA_ARGS__); \
107 } while (/*CONSTCOND*/0)
108 #else
109 #define DPRINTF(...)
110 #endif
111
112 #ifdef ZLIBSUPPORT
113 /*
114 * The following python code is not really used because ZLIBSUPPORT is only
115 * defined if we have a built-in zlib, and the built-in zlib handles that.
116 * That is not true for android where we have zlib.h and not -lz.
117 */
118 static const char zlibcode[] =
119 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
120
121 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
122
123 static int
zlibcmp(const unsigned char * buf)124 zlibcmp(const unsigned char *buf)
125 {
126 unsigned short x = 1;
127 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
128
129 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
130 return 0;
131 if (s[0] != 1) /* endianness test */
132 x = buf[0] | (buf[1] << 8);
133 else
134 x = buf[1] | (buf[0] << 8);
135 if (x % 31)
136 return 0;
137 return 1;
138 }
139 #endif
140
141 static int
lzmacmp(const unsigned char * buf)142 lzmacmp(const unsigned char *buf)
143 {
144 if (buf[0] != 0x5d || buf[1] || buf[2])
145 return 0;
146 if (buf[12] && buf[12] != 0xff)
147 return 0;
148 return 1;
149 }
150
151 #define gzip_flags "-cd"
152 #define lzip_flags gzip_flags
153
154 static const char *gzip_args[] = {
155 "gzip", gzip_flags, NULL
156 };
157 static const char *uncompress_args[] = {
158 "uncompress", "-c", NULL
159 };
160 static const char *bzip2_args[] = {
161 "bzip2", "-cd", NULL
162 };
163 static const char *lzip_args[] = {
164 "lzip", lzip_flags, NULL
165 };
166 static const char *xz_args[] = {
167 "xz", "-cd", NULL
168 };
169 static const char *lrzip_args[] = {
170 "lrzip", "-qdf", "-", NULL
171 };
172 static const char *lz4_args[] = {
173 "lz4", "-cd", NULL
174 };
175 static const char *zstd_args[] = {
176 "zstd", "-cd", NULL
177 };
178
179 #define do_zlib NULL
180 #define do_bzlib NULL
181
182 file_private const struct {
183 union {
184 const char *magic;
185 int (*func)(const unsigned char *);
186 } u;
187 int maglen;
188 const char **argv;
189 void *unused;
190 } compr[] = {
191 #define METH_FROZEN 2
192 #define METH_BZIP 7
193 #define METH_XZ 9
194 #define METH_LZIP 8
195 #define METH_ZSTD 12
196 #define METH_LZMA 13
197 #define METH_ZLIB 14
198 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
199 /* Uncompress can get stuck; so use gzip first if we have it
200 * Idea from Damien Clark, thanks! */
201 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
202 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
203 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
204 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
205 /* the standard pack utilities do not accept standard input */
206 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
207 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
208 /* ...only first file examined */
209 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
210 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
211 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
212 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
213 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
214 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
215 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
216 #ifdef ZLIBSUPPORT
217 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
218 #endif
219 };
220
221 #define OKDATA 0
222 #define NODATA 1
223 #define ERRDATA 2
224
225 file_private ssize_t swrite(int, const void *, size_t);
226 #if HAVE_FORK
227 file_private size_t ncompr = __arraycount(compr);
228 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
229 unsigned char **, size_t *);
230 #ifdef BUILTIN_DECOMPRESS
231 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
232 size_t *, int);
233 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
234 size_t *, int);
235 #endif
236 #ifdef BUILTIN_BZLIB
237 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
238 size_t *, int);
239 #endif
240 #ifdef BUILTIN_XZLIB
241 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
242 size_t *, int);
243 #endif
244 #ifdef BUILTIN_ZSTDLIB
245 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
246 size_t *, int);
247 #endif
248 #ifdef BUILTIN_LZLIB
249 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
250 size_t *, int);
251 #endif
252
253 static int makeerror(unsigned char **, size_t *, const char *, ...)
254 __attribute__((__format__(__printf__, 3, 4)));
255 file_private const char *methodname(size_t);
256
257 file_private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)258 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
259 {
260 unsigned char *p;
261 int mime = ms->flags & MAGIC_MIME;
262
263 if (!mime)
264 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
265
266 for (p = buf; *p; p++)
267 if (!isalnum(*p))
268 *p = '-';
269
270 return file_printf(ms, "application/x-decompression-error-%s-%s",
271 methodname(i), buf);
272 }
273
274 file_protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)275 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
276 {
277 unsigned char *newbuf = NULL;
278 size_t i, nsz;
279 char *rbuf;
280 file_pushbuf_t *pb;
281 int urv, prv, rv = 0;
282 int mime = ms->flags & MAGIC_MIME;
283 int fd = b->fd;
284 const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
285 size_t nbytes = b->flen;
286 int sa_saved = 0;
287 struct sigaction sig_act;
288
289 if ((ms->flags & MAGIC_COMPRESS) == 0)
290 return 0;
291
292 for (i = 0; i < ncompr; i++) {
293 int zm;
294 if (nbytes < CAST(size_t, abs(compr[i].maglen)))
295 continue;
296 if (compr[i].maglen < 0) {
297 zm = (*compr[i].u.func)(buf);
298 } else {
299 zm = memcmp(buf, compr[i].u.magic,
300 CAST(size_t, compr[i].maglen)) == 0;
301 }
302
303 if (!zm)
304 continue;
305
306 /* Prevent SIGPIPE death if child dies unexpectedly */
307 if (!sa_saved) {
308 //We can use sig_act for both new and old, but
309 struct sigaction new_act;
310 memset(&new_act, 0, sizeof(new_act));
311 new_act.sa_handler = SIG_IGN;
312 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
313 }
314
315 nsz = nbytes;
316 free(newbuf);
317 urv = uncompressbuf(fd, ms->bytes_max, i,
318 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
319 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
320 (char *)newbuf, nsz);
321 switch (urv) {
322 case OKDATA:
323 case ERRDATA:
324 ms->flags &= ~MAGIC_COMPRESS;
325 if (urv == ERRDATA)
326 prv = format_decompression_error(ms, i, newbuf);
327 else
328 prv = file_buffer(ms, -1, NULL, name, newbuf,
329 nsz);
330 if (prv == -1)
331 goto error;
332 rv = 1;
333 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
334 goto out;
335 if (mime != MAGIC_MIME && mime != 0)
336 goto out;
337 if ((file_printf(ms,
338 mime ? " compressed-encoding=" : " (")) == -1)
339 goto error;
340 if ((pb = file_push_buffer(ms)) == NULL)
341 goto error;
342 /*
343 * XXX: If file_buffer fails here, we overwrite
344 * the compressed text. FIXME.
345 */
346 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
347 {
348 if (file_pop_buffer(ms, pb) != NULL)
349 abort();
350 goto error;
351 }
352 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
353 if (file_printf(ms, "%s", rbuf) == -1) {
354 free(rbuf);
355 goto error;
356 }
357 free(rbuf);
358 }
359 if (!mime && file_printf(ms, ")") == -1)
360 goto error;
361 /*FALLTHROUGH*/
362 case NODATA:
363 break;
364 default:
365 abort();
366 /*NOTREACHED*/
367 error:
368 rv = -1;
369 break;
370 }
371 }
372 out:
373 DPRINTF("rv = %d\n", rv);
374
375 if (sa_saved && sig_act.sa_handler != SIG_IGN)
376 (void)sigaction(SIGPIPE, &sig_act, NULL);
377
378 free(newbuf);
379 ms->flags |= MAGIC_COMPRESS;
380 DPRINTF("Zmagic returns %d\n", rv);
381 return rv;
382 }
383 #endif
384 /*
385 * `safe' write for sockets and pipes.
386 */
387 file_private ssize_t
swrite(int fd,const void * buf,size_t n)388 swrite(int fd, const void *buf, size_t n)
389 {
390 ssize_t rv;
391 size_t rn = n;
392
393 do
394 switch (rv = write(fd, buf, n)) {
395 case -1:
396 if (errno == EINTR)
397 continue;
398 return -1;
399 default:
400 n -= rv;
401 buf = CAST(const char *, buf) + rv;
402 break;
403 }
404 while (n > 0);
405 return rn;
406 }
407
408
409 /*
410 * `safe' read for sockets and pipes.
411 */
412 file_protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)413 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
414 {
415 ssize_t rv;
416 #if defined(FIONREAD) && !defined(__MINGW32__)
417 int t = 0;
418 #endif
419 size_t rn = n;
420
421 if (fd == STDIN_FILENO)
422 goto nocheck;
423
424 #if defined(FIONREAD) && !defined(__MINGW32__)
425 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
426 #ifdef FD_ZERO
427 ssize_t cnt;
428 for (cnt = 0;; cnt++) {
429 fd_set check;
430 struct timeval tout = {0, 100 * 1000};
431 int selrv;
432
433 FD_ZERO(&check);
434 FD_SET(fd, &check);
435
436 /*
437 * Avoid soft deadlock: do not read if there
438 * is nothing to read from sockets and pipes.
439 */
440 selrv = select(fd + 1, &check, NULL, NULL, &tout);
441 if (selrv == -1) {
442 if (errno == EINTR || errno == EAGAIN)
443 continue;
444 } else if (selrv == 0 && cnt >= 5) {
445 return 0;
446 } else
447 break;
448 }
449 #endif
450 (void)ioctl(fd, FIONREAD, &t);
451 }
452
453 if (t > 0 && CAST(size_t, t) < n) {
454 n = t;
455 rn = n;
456 }
457 #endif
458
459 nocheck:
460 do
461 switch ((rv = read(fd, buf, n))) {
462 case -1:
463 if (errno == EINTR)
464 continue;
465 return -1;
466 case 0:
467 return rn - n;
468 default:
469 n -= rv;
470 buf = CAST(char *, CCAST(void *, buf)) + rv;
471 break;
472 }
473 while (n > 0);
474 return rn;
475 }
476
477 file_protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)478 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
479 size_t nbytes)
480 {
481 char buf[4096];
482 ssize_t r;
483 int tfd;
484
485 #ifdef WIN32
486 const char *t;
487 buf[0] = '\0';
488 if ((t = getenv("TEMP")) != NULL)
489 (void)strlcpy(buf, t, sizeof(buf));
490 else if ((t = getenv("TMP")) != NULL)
491 (void)strlcpy(buf, t, sizeof(buf));
492 else if ((t = getenv("TMPDIR")) != NULL)
493 (void)strlcpy(buf, t, sizeof(buf));
494 if (buf[0] != '\0')
495 (void)strlcat(buf, "/", sizeof(buf));
496 (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
497 #else
498 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
499 #endif
500 #ifndef HAVE_MKSTEMP
501 {
502 char *ptr = mktemp(buf);
503 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
504 r = errno;
505 (void)unlink(ptr);
506 errno = r;
507 }
508 #else
509 {
510 int te;
511 mode_t ou = umask(0);
512 tfd = mkstemp(buf);
513 (void)umask(ou);
514 te = errno;
515 (void)unlink(buf);
516 errno = te;
517 }
518 #endif
519 if (tfd == -1) {
520 file_error(ms, errno,
521 "cannot create temporary file for pipe copy");
522 return -1;
523 }
524
525 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
526 r = 1;
527 else {
528 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
529 if (swrite(tfd, buf, CAST(size_t, r)) != r)
530 break;
531 }
532
533 switch (r) {
534 case -1:
535 file_error(ms, errno, "error copying from pipe to temp file");
536 return -1;
537 case 0:
538 break;
539 default:
540 file_error(ms, errno, "error while writing to temp file");
541 return -1;
542 }
543
544 /*
545 * We duplicate the file descriptor, because fclose on a
546 * tmpfile will delete the file, but any open descriptors
547 * can still access the phantom inode.
548 */
549 if ((fd = dup2(tfd, fd)) == -1) {
550 file_error(ms, errno, "could not dup descriptor for temp file");
551 return -1;
552 }
553 (void)close(tfd);
554 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
555 file_badseek(ms);
556 return -1;
557 }
558 return fd;
559 }
560 #if HAVE_FORK
561 #ifdef BUILTIN_DECOMPRESS
562
563 #define FHCRC (1 << 1)
564 #define FEXTRA (1 << 2)
565 #define FNAME (1 << 3)
566 #define FCOMMENT (1 << 4)
567
568
569 file_private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)570 uncompressgzipped(const unsigned char *old, unsigned char **newch,
571 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
572 {
573 unsigned char flg;
574 size_t data_start = 10;
575
576 if (*n < 4) {
577 goto err;
578 }
579
580 flg = old[3];
581
582 if (flg & FEXTRA) {
583 if (data_start + 1 >= *n)
584 goto err;
585 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
586 }
587 if (flg & FNAME) {
588 while(data_start < *n && old[data_start])
589 data_start++;
590 data_start++;
591 }
592 if (flg & FCOMMENT) {
593 while(data_start < *n && old[data_start])
594 data_start++;
595 data_start++;
596 }
597 if (flg & FHCRC)
598 data_start += 2;
599
600 if (data_start >= *n)
601 goto err;
602
603 *n -= data_start;
604 old += data_start;
605 return uncompresszlib(old, newch, bytes_max, n, 0);
606 err:
607 return makeerror(newch, n, "File too short");
608 }
609
610 file_private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)611 uncompresszlib(const unsigned char *old, unsigned char **newch,
612 size_t bytes_max, size_t *n, int zlib)
613 {
614 int rc;
615 z_stream z;
616
617 DPRINTF("builtin zlib decompression\n");
618 z.next_in = CCAST(Bytef *, old);
619 z.avail_in = CAST(uint32_t, *n);
620 z.next_out = *newch;
621 z.avail_out = CAST(unsigned int, bytes_max);
622 z.zalloc = Z_NULL;
623 z.zfree = Z_NULL;
624 z.opaque = Z_NULL;
625
626 /* LINTED bug in header macro */
627 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
628 if (rc != Z_OK)
629 goto err;
630
631 rc = inflate(&z, Z_SYNC_FLUSH);
632 if (rc != Z_OK && rc != Z_STREAM_END) {
633 inflateEnd(&z);
634 goto err;
635 }
636
637 *n = CAST(size_t, z.total_out);
638 rc = inflateEnd(&z);
639 if (rc != Z_OK)
640 goto err;
641
642 /* let's keep the nul-terminate tradition */
643 (*newch)[*n] = '\0';
644
645 return OKDATA;
646 err:
647 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
648 }
649 #endif
650
651 #ifdef BUILTIN_BZLIB
652 file_private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)653 uncompressbzlib(const unsigned char *old, unsigned char **newch,
654 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
655 {
656 int rc;
657 bz_stream bz;
658
659 DPRINTF("builtin bzlib decompression\n");
660 memset(&bz, 0, sizeof(bz));
661 rc = BZ2_bzDecompressInit(&bz, 0, 0);
662 if (rc != BZ_OK)
663 goto err;
664
665 bz.next_in = CCAST(char *, RCAST(const char *, old));
666 bz.avail_in = CAST(uint32_t, *n);
667 bz.next_out = RCAST(char *, *newch);
668 bz.avail_out = CAST(unsigned int, bytes_max);
669
670 rc = BZ2_bzDecompress(&bz);
671 if (rc != BZ_OK && rc != BZ_STREAM_END) {
672 BZ2_bzDecompressEnd(&bz);
673 goto err;
674 }
675
676 /* Assume byte_max is within 32bit */
677 /* assert(bz.total_out_hi32 == 0); */
678 *n = CAST(size_t, bz.total_out_lo32);
679 rc = BZ2_bzDecompressEnd(&bz);
680 if (rc != BZ_OK)
681 goto err;
682
683 /* let's keep the nul-terminate tradition */
684 (*newch)[*n] = '\0';
685
686 return OKDATA;
687 err:
688 return makeerror(newch, n, "bunzip error %d", rc);
689 }
690 #endif
691
692 #ifdef BUILTIN_XZLIB
693 file_private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)694 uncompressxzlib(const unsigned char *old, unsigned char **newch,
695 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
696 {
697 int rc;
698 lzma_stream xz;
699
700 DPRINTF("builtin xzlib decompression\n");
701 memset(&xz, 0, sizeof(xz));
702 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
703 if (rc != LZMA_OK)
704 goto err;
705
706 xz.next_in = CCAST(const uint8_t *, old);
707 xz.avail_in = CAST(uint32_t, *n);
708 xz.next_out = RCAST(uint8_t *, *newch);
709 xz.avail_out = CAST(unsigned int, bytes_max);
710
711 rc = lzma_code(&xz, LZMA_RUN);
712 if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
713 lzma_end(&xz);
714 goto err;
715 }
716
717 *n = CAST(size_t, xz.total_out);
718
719 lzma_end(&xz);
720
721 /* let's keep the nul-terminate tradition */
722 (*newch)[*n] = '\0';
723
724 return OKDATA;
725 err:
726 return makeerror(newch, n, "unxz error %d", rc);
727 }
728 #endif
729
730 #ifdef BUILTIN_ZSTDLIB
731 file_private int
uncompresszstd(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)732 uncompresszstd(const unsigned char *old, unsigned char **newch,
733 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
734 {
735 size_t rc;
736 ZSTD_DStream *zstd;
737 ZSTD_inBuffer in;
738 ZSTD_outBuffer out;
739
740 DPRINTF("builtin zstd decompression\n");
741 if ((zstd = ZSTD_createDStream()) == NULL) {
742 return makeerror(newch, n, "No ZSTD decompression stream, %s",
743 strerror(errno));
744 }
745
746 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
747 if (ZSTD_isError(rc))
748 goto err;
749
750 in.src = CCAST(const void *, old);
751 in.size = *n;
752 in.pos = 0;
753 out.dst = RCAST(void *, *newch);
754 out.size = bytes_max;
755 out.pos = 0;
756
757 rc = ZSTD_decompressStream(zstd, &out, &in);
758 if (ZSTD_isError(rc))
759 goto err;
760
761 *n = out.pos;
762
763 ZSTD_freeDStream(zstd);
764
765 /* let's keep the nul-terminate tradition */
766 (*newch)[*n] = '\0';
767
768 return OKDATA;
769 err:
770 ZSTD_freeDStream(zstd);
771 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
772 }
773 #endif
774
775 #ifdef BUILTIN_LZLIB
776 file_private int
uncompresslzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)777 uncompresslzlib(const unsigned char *old, unsigned char **newch,
778 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
779 {
780 enum LZ_Errno err;
781 size_t old_remaining = *n;
782 size_t new_remaining = bytes_max;
783 size_t total_read = 0;
784 unsigned char *bufp;
785 struct LZ_Decoder *dec;
786
787 bufp = *newch;
788
789 DPRINTF("builtin lzlib decompression\n");
790 dec = LZ_decompress_open();
791 if (!dec) {
792 return makeerror(newch, n, "unable to allocate LZ_Decoder");
793 }
794 if (LZ_decompress_errno(dec) != LZ_ok)
795 goto err;
796
797 for (;;) {
798 // LZ_decompress_read() stops at member boundaries, so we may
799 // have more than one successful read after writing all data
800 // we have.
801 if (old_remaining > 0) {
802 int wr = LZ_decompress_write(dec, old, old_remaining);
803 if (wr < 0)
804 goto err;
805 old_remaining -= wr;
806 old += wr;
807 }
808
809 int rd = LZ_decompress_read(dec, bufp, new_remaining);
810 if (rd > 0) {
811 new_remaining -= rd;
812 bufp += rd;
813 total_read += rd;
814 }
815
816 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
817 goto err;
818 if (new_remaining == 0)
819 break;
820 if (old_remaining == 0 && rd == 0)
821 break;
822 }
823
824 LZ_decompress_close(dec);
825 *n = total_read;
826
827 /* let's keep the nul-terminate tradition */
828 *bufp = '\0';
829
830 return OKDATA;
831 err:
832 err = LZ_decompress_errno(dec);
833 LZ_decompress_close(dec);
834 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
835 }
836 #endif
837
838
839 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)840 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
841 {
842 char *msg;
843 va_list ap;
844 int rv;
845
846 DPRINTF("Makeerror %s\n", fmt);
847 free(*buf);
848 va_start(ap, fmt);
849 rv = vasprintf(&msg, fmt, ap);
850 va_end(ap);
851 if (rv < 0) {
852 DPRINTF("Makeerror failed");
853 *buf = NULL;
854 *len = 0;
855 return NODATA;
856 }
857 *buf = RCAST(unsigned char *, msg);
858 *len = strlen(msg);
859 return ERRDATA;
860 }
861
862 static void
closefd(int * fd,size_t i)863 closefd(int *fd, size_t i)
864 {
865 if (fd[i] == -1)
866 return;
867 (void) close(fd[i]);
868 fd[i] = -1;
869 }
870
871 static void
closep(int * fd)872 closep(int *fd)
873 {
874 size_t i;
875 for (i = 0; i < 2; i++)
876 closefd(fd, i);
877 }
878
879 static void
movedesc(void * v,int i,int fd)880 movedesc(void *v, int i, int fd)
881 {
882 if (fd == i)
883 return; /* "no dup was necessary" */
884 #ifdef HAVE_POSIX_SPAWNP
885 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
886 posix_spawn_file_actions_adddup2(fa, fd, i);
887 posix_spawn_file_actions_addclose(fa, fd);
888 #else
889 if (dup2(fd, i) == -1) {
890 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
891 exit(EXIT_FAILURE);
892 }
893 close(v ? fd : fd);
894 #endif
895 }
896
897 static void
closedesc(void * v,int fd)898 closedesc(void *v, int fd)
899 {
900 #ifdef HAVE_POSIX_SPAWNP
901 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
902 posix_spawn_file_actions_addclose(fa, fd);
903 #else
904 close(v ? fd : fd);
905 #endif
906 }
907
908 static void
handledesc(void * v,int fd,int fdp[3][2])909 handledesc(void *v, int fd, int fdp[3][2])
910 {
911 if (fd != -1) {
912 (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
913 movedesc(v, STDIN_FILENO, fd);
914 } else {
915 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
916 if (fdp[STDIN_FILENO][1] > 2)
917 closedesc(v, fdp[STDIN_FILENO][1]);
918 }
919
920 file_clear_closexec(STDIN_FILENO);
921
922 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
923 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
924 if (fdp[STDOUT_FILENO][0] > 2)
925 closedesc(v, fdp[STDOUT_FILENO][0]);
926
927 file_clear_closexec(STDOUT_FILENO);
928
929 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
930 if (fdp[STDERR_FILENO][0] > 2)
931 closedesc(v, fdp[STDERR_FILENO][0]);
932
933 file_clear_closexec(STDERR_FILENO);
934 }
935
936 static pid_t
writechild(int fd,const void * old,size_t n)937 writechild(int fd, const void *old, size_t n)
938 {
939 pid_t pid;
940
941 /*
942 * fork again, to avoid blocking because both
943 * pipes filled
944 */
945 pid = fork();
946 if (pid == -1) {
947 DPRINTF("Fork failed (%s)\n", strerror(errno));
948 return -1;
949 }
950 if (pid == 0) {
951 /* child */
952 if (swrite(fd, old, n) != CAST(ssize_t, n)) {
953 DPRINTF("Write failed (%s)\n", strerror(errno));
954 exit(EXIT_FAILURE);
955 }
956 exit(EXIT_SUCCESS);
957 }
958 /* parent */
959 return pid;
960 }
961
962 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)963 filter_error(unsigned char *ubuf, ssize_t n)
964 {
965 char *p;
966 char *buf;
967
968 ubuf[n] = '\0';
969 buf = RCAST(char *, ubuf);
970 while (isspace(CAST(unsigned char, *buf)))
971 buf++;
972 DPRINTF("Filter error[[[%s]]]\n", buf);
973 if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
974 *p = '\0';
975 if ((p = strchr(CAST(char *, buf), ';')) != NULL)
976 *p = '\0';
977 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
978 ++p;
979 while (isspace(CAST(unsigned char, *p)))
980 p++;
981 n = strlen(p);
982 memmove(ubuf, p, CAST(size_t, n + 1));
983 }
984 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
985 if (islower(*ubuf))
986 *ubuf = toupper(*ubuf);
987 return n;
988 }
989
990 file_private const char *
methodname(size_t method)991 methodname(size_t method)
992 {
993 switch (method) {
994 #ifdef BUILTIN_DECOMPRESS
995 case METH_FROZEN:
996 case METH_ZLIB:
997 return "zlib";
998 #endif
999 #ifdef BUILTIN_BZLIB
1000 case METH_BZIP:
1001 return "bzlib";
1002 #endif
1003 #ifdef BUILTIN_XZLIB
1004 case METH_XZ:
1005 case METH_LZMA:
1006 return "xzlib";
1007 #endif
1008 #ifdef BUILTIN_ZSTDLIB
1009 case METH_ZSTD:
1010 return "zstd";
1011 #endif
1012 #ifdef BUILTIN_LZLIB
1013 case METH_LZIP:
1014 return "lzlib";
1015 #endif
1016 default:
1017 return compr[method].argv[0];
1018 }
1019 }
1020
1021 file_private int (*
getdecompressor(size_t method)1022 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1023 size_t *, int)
1024 {
1025 switch (method) {
1026 #ifdef BUILTIN_DECOMPRESS
1027 case METH_FROZEN:
1028 return uncompressgzipped;
1029 case METH_ZLIB:
1030 return uncompresszlib;
1031 #endif
1032 #ifdef BUILTIN_BZLIB
1033 case METH_BZIP:
1034 return uncompressbzlib;
1035 #endif
1036 #ifdef BUILTIN_XZLIB
1037 case METH_XZ:
1038 case METH_LZMA:
1039 return uncompressxzlib;
1040 #endif
1041 #ifdef BUILTIN_ZSTDLIB
1042 case METH_ZSTD:
1043 return uncompresszstd;
1044 #endif
1045 #ifdef BUILTIN_LZLIB
1046 case METH_LZIP:
1047 return uncompresslzlib;
1048 #endif
1049 default:
1050 return NULL;
1051 }
1052 }
1053
1054 file_private int
uncompressbuf(int fd,size_t bytes_max,size_t method,int nofork,const unsigned char * old,unsigned char ** newch,size_t * n)1055 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1056 const unsigned char *old, unsigned char **newch, size_t* n)
1057 {
1058 int fdp[3][2];
1059 int status, rv, w;
1060 pid_t pid;
1061 pid_t writepid = -1;
1062 size_t i;
1063 ssize_t r, re;
1064 char *const *args;
1065 #ifdef HAVE_POSIX_SPAWNP
1066 posix_spawn_file_actions_t fa;
1067 #endif
1068 int (*decompress)(const unsigned char *, unsigned char **,
1069 size_t, size_t *, int) = getdecompressor(method);
1070
1071 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
1072 if (*newch == NULL)
1073 return makeerror(newch, n, "No buffer, %s", strerror(errno));
1074
1075 if (decompress) {
1076 if (nofork) {
1077 return makeerror(newch, n,
1078 "Fork is required to uncompress, but disabled");
1079 }
1080 return (*decompress)(old, newch, bytes_max, n, 1);
1081 }
1082
1083 (void)fflush(stdout);
1084 (void)fflush(stderr);
1085
1086 for (i = 0; i < __arraycount(fdp); i++)
1087 fdp[i][0] = fdp[i][1] = -1;
1088
1089 /*
1090 * There are multithreaded users who run magic_file()
1091 * from dozens of threads. If two parallel magic_file() calls
1092 * analyze two large compressed files, both will spawn
1093 * an uncompressing child here, which writes out uncompressed data.
1094 * We read some portion, then close the pipe, then waitpid() the child.
1095 * If uncompressed data is larger, child should get EPIPE and exit.
1096 * However, with *parallel* calls OTHER child may unintentionally
1097 * inherit pipe fds, thus keeping pipe open and making writes in
1098 * our child block instead of failing with EPIPE!
1099 * (For the bug to occur, two threads must mutually inherit their pipes,
1100 * and both must have large outputs. Thus it happens not that often).
1101 * To avoid this, be sure to create pipes with O_CLOEXEC.
1102 */
1103 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1104 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1105 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1106 closep(fdp[STDIN_FILENO]);
1107 closep(fdp[STDOUT_FILENO]);
1108 return makeerror(newch, n, "Cannot create pipe, %s",
1109 strerror(errno));
1110 }
1111
1112 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1113 #ifdef HAVE_POSIX_SPAWNP
1114 posix_spawn_file_actions_init(&fa);
1115
1116 handledesc(&fa, fd, fdp);
1117
1118 DPRINTF("Executing %s\n", compr[method].argv[0]);
1119 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1120 args, NULL);
1121
1122 posix_spawn_file_actions_destroy(&fa);
1123
1124 if (status == -1) {
1125 return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1126 compr[method].argv[0], strerror(errno));
1127 }
1128 #else
1129 /* For processes with large mapped virtual sizes, vfork
1130 * may be _much_ faster (10-100 times) than fork.
1131 */
1132 pid = vfork();
1133 if (pid == -1) {
1134 return makeerror(newch, n, "Cannot vfork, %s",
1135 strerror(errno));
1136 }
1137 if (pid == 0) {
1138 /* child */
1139 /* Note: we are after vfork, do not modify memory
1140 * in a way which confuses parent. In particular,
1141 * do not modify fdp[i][j].
1142 */
1143 handledesc(NULL, fd, fdp);
1144 DPRINTF("Executing %s\n", compr[method].argv[0]);
1145
1146 (void)execvp(compr[method].argv[0], args);
1147 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1148 compr[method].argv[0], strerror(errno));
1149 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1150 }
1151 #endif
1152 /* parent */
1153 /* Close write sides of child stdout/err pipes */
1154 for (i = 1; i < __arraycount(fdp); i++)
1155 closefd(fdp[i], 1);
1156 /* Write the buffer data to child stdin, if we don't have fd */
1157 if (fd == -1) {
1158 closefd(fdp[STDIN_FILENO], 0);
1159 writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1160 if (writepid == (pid_t)-1) {
1161 rv = makeerror(newch, n, "Write to child failed, %s",
1162 strerror(errno));
1163 DPRINTF("Write to child failed\n");
1164 goto err;
1165 }
1166 closefd(fdp[STDIN_FILENO], 1);
1167 }
1168
1169 rv = OKDATA;
1170 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1171 DPRINTF("read got %zd\n", r);
1172 if (r < 0) {
1173 rv = ERRDATA;
1174 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1175 strerror(errno));
1176 goto err;
1177 }
1178 if (CAST(size_t, r) == bytes_max) {
1179 /*
1180 * close fd so that the child exits with sigpipe and ignore
1181 * errors, otherwise we risk the child blocking and never
1182 * exiting.
1183 */
1184 DPRINTF("Closing stdout for bytes_max\n");
1185 closefd(fdp[STDOUT_FILENO], 0);
1186 goto ok;
1187 }
1188 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1189 DPRINTF("Got stuff from stderr %s\n", *newch);
1190 rv = ERRDATA;
1191 r = filter_error(*newch, r);
1192 goto ok;
1193 }
1194 if (re == 0)
1195 goto ok;
1196 rv = makeerror(newch, n, "Read stderr failed, %s",
1197 strerror(errno));
1198 goto err;
1199 ok:
1200 *n = r;
1201 /* NUL terminate, as every buffer is handled here. */
1202 (*newch)[*n] = '\0';
1203 err:
1204 closefd(fdp[STDIN_FILENO], 1);
1205 closefd(fdp[STDOUT_FILENO], 0);
1206 closefd(fdp[STDERR_FILENO], 0);
1207
1208 w = waitpid(pid, &status, 0);
1209 wait_err:
1210 if (w == -1) {
1211 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1212 DPRINTF("Child wait return %#x\n", status);
1213 } else if (!WIFEXITED(status)) {
1214 DPRINTF("Child not exited (%#x)\n", status);
1215 } else if (WEXITSTATUS(status) != 0) {
1216 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1217 }
1218 if (writepid > 0) {
1219 /* _After_ we know decompressor has exited, our input writer
1220 * definitely will exit now (at worst, writing fails in it,
1221 * since output fd is closed now on the reading size).
1222 */
1223 w = waitpid(writepid, &status, 0);
1224 writepid = -1;
1225 goto wait_err;
1226 }
1227
1228 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1229 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1230
1231 return rv;
1232 }
1233 #endif
1234