1 /* $NetBSD: unzip.c,v 1.1 2009/06/25 20:27:05 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2009 Joerg Sonnenberger <joerg@NetBSD.org> 5 * Copyright (c) 2007-2008 Dag-Erling Co�dan Sm�rgrav 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD: revision 180124$ 31 * 32 * This file would be much shorter if we didn't care about command-line 33 * compatibility with Info-ZIP's UnZip, which requires us to duplicate 34 * parts of libarchive in order to gain more detailed control of its 35 * behaviour for the purpose of implementing the -n, -o, -L and -a 36 * options. 37 */ 38 39 #include <sys/cdefs.h> 40 __RCSID("$NetBSD: unzip.c,v 1.1 2009/06/25 20:27:05 joerg Exp $"); 41 42 #include <sys/queue.h> 43 #include <sys/stat.h> 44 45 #include <ctype.h> 46 #include <errno.h> 47 #include <fcntl.h> 48 #include <fnmatch.h> 49 #include <stdarg.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 55 #include <archive.h> 56 #include <archive_entry.h> 57 58 /* command-line options */ 59 static int a_opt; /* convert EOL */ 60 static const char *d_arg; /* directory */ 61 static int f_opt; /* update existing files only */ 62 static int j_opt; /* junk directories */ 63 static int L_opt; /* lowercase names */ 64 static int l_opt; /* list */ 65 static int n_opt; /* never overwrite */ 66 static int o_opt; /* always overwrite */ 67 static int q_opt; /* quiet */ 68 static int t_opt; /* test */ 69 static int u_opt; /* update */ 70 71 /* time when unzip started */ 72 static time_t now; 73 74 /* debug flag */ 75 static int unzip_debug; 76 77 /* running on tty? */ 78 static int tty; 79 80 /* error flag for -t */ 81 static int test_failed; 82 83 /* convenience macro */ 84 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */ 85 #define ac(call) \ 86 do { \ 87 int acret = (call); \ 88 if (acret != ARCHIVE_OK) \ 89 errorx("%s", archive_error_string(a)); \ 90 } while (0) 91 92 /* 93 * Indicates that last info() did not end with EOL. This helps error() et 94 * al. avoid printing an error message on the same line as an incomplete 95 * informational message. 96 */ 97 static int noeol; 98 99 /* fatal error message + errno */ 100 static void 101 error(const char *fmt, ...) 102 { 103 va_list ap; 104 105 if (noeol) 106 fprintf(stdout, "\n"); 107 fflush(stdout); 108 fprintf(stderr, "unzip: "); 109 va_start(ap, fmt); 110 vfprintf(stderr, fmt, ap); 111 va_end(ap); 112 fprintf(stderr, ": %s\n", strerror(errno)); 113 exit(1); 114 } 115 116 /* fatal error message, no errno */ 117 static void 118 errorx(const char *fmt, ...) 119 { 120 va_list ap; 121 122 if (noeol) 123 fprintf(stdout, "\n"); 124 fflush(stdout); 125 fprintf(stderr, "unzip: "); 126 va_start(ap, fmt); 127 vfprintf(stderr, fmt, ap); 128 va_end(ap); 129 fprintf(stderr, "\n"); 130 exit(1); 131 } 132 133 #if 0 134 /* non-fatal error message + errno */ 135 static void 136 warning(const char *fmt, ...) 137 { 138 va_list ap; 139 140 if (noeol) 141 fprintf(stdout, "\n"); 142 fflush(stdout); 143 fprintf(stderr, "unzip: "); 144 va_start(ap, fmt); 145 vfprintf(stderr, fmt, ap); 146 va_end(ap); 147 fprintf(stderr, ": %s\n", strerror(errno)); 148 } 149 #endif 150 151 /* non-fatal error message, no errno */ 152 static void 153 warningx(const char *fmt, ...) 154 { 155 va_list ap; 156 157 if (noeol) 158 fprintf(stdout, "\n"); 159 fflush(stdout); 160 fprintf(stderr, "unzip: "); 161 va_start(ap, fmt); 162 vfprintf(stderr, fmt, ap); 163 va_end(ap); 164 fprintf(stderr, "\n"); 165 } 166 167 /* informational message (if not -q) */ 168 static void 169 info(const char *fmt, ...) 170 { 171 va_list ap; 172 173 if (q_opt && !unzip_debug) 174 return; 175 va_start(ap, fmt); 176 vfprintf(stdout, fmt, ap); 177 va_end(ap); 178 fflush(stdout); 179 180 if (*fmt == '\0') 181 noeol = 1; 182 else 183 noeol = fmt[strlen(fmt) - 1] != '\n'; 184 } 185 186 /* debug message (if unzip_debug) */ 187 static void 188 debug(const char *fmt, ...) 189 { 190 va_list ap; 191 192 if (!unzip_debug) 193 return; 194 va_start(ap, fmt); 195 vfprintf(stderr, fmt, ap); 196 va_end(ap); 197 fflush(stderr); 198 199 if (*fmt == '\0') 200 noeol = 1; 201 else 202 noeol = fmt[strlen(fmt) - 1] != '\n'; 203 } 204 205 /* duplicate a path name, possibly converting to lower case */ 206 static char * 207 pathdup(const char *path) 208 { 209 char *str; 210 size_t i, len; 211 212 len = strlen(path); 213 while (len && path[len - 1] == '/') 214 len--; 215 if ((str = malloc(len + 1)) == NULL) { 216 errno = ENOMEM; 217 error("malloc()"); 218 } 219 if (L_opt) { 220 for (i = 0; i < len; ++i) 221 str[i] = tolower((unsigned char)path[i]); 222 } else { 223 memcpy(str, path, len); 224 } 225 str[len] = '\0'; 226 227 return (str); 228 } 229 230 /* concatenate two path names */ 231 static char * 232 pathcat(const char *prefix, const char *path) 233 { 234 char *str; 235 size_t prelen, len; 236 237 prelen = prefix ? strlen(prefix) + 1 : 0; 238 len = strlen(path) + 1; 239 if ((str = malloc(prelen + len)) == NULL) { 240 errno = ENOMEM; 241 error("malloc()"); 242 } 243 if (prefix) { 244 memcpy(str, prefix, prelen); /* includes zero */ 245 str[prelen - 1] = '/'; /* splat zero */ 246 } 247 memcpy(str + prelen, path, len); /* includes zero */ 248 249 return (str); 250 } 251 252 /* 253 * Pattern lists for include / exclude processing 254 */ 255 struct pattern { 256 STAILQ_ENTRY(pattern) link; 257 char pattern[]; 258 }; 259 260 STAILQ_HEAD(pattern_list, pattern); 261 static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include); 262 static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude); 263 264 /* 265 * Add an entry to a pattern list 266 */ 267 static void 268 add_pattern(struct pattern_list *list, const char *pattern) 269 { 270 struct pattern *entry; 271 size_t len; 272 273 debug("adding pattern '%s'\n", pattern); 274 len = strlen(pattern); 275 if ((entry = malloc(sizeof *entry + len + 1)) == NULL) { 276 errno = ENOMEM; 277 error("malloc()"); 278 } 279 memcpy(entry->pattern, pattern, len + 1); 280 STAILQ_INSERT_TAIL(list, entry, link); 281 } 282 283 /* 284 * Match a string against a list of patterns 285 */ 286 static int 287 match_pattern(struct pattern_list *list, const char *str) 288 { 289 struct pattern *entry; 290 291 STAILQ_FOREACH(entry, list, link) { 292 if (fnmatch(entry->pattern, str, 0) == 0) 293 return (1); 294 } 295 return (0); 296 } 297 298 /* 299 * Verify that a given pathname is in the include list and not in the 300 * exclude list. 301 */ 302 static int 303 accept_pathname(const char *pathname) 304 { 305 306 if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname)) 307 return (0); 308 if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname)) 309 return (0); 310 return (1); 311 } 312 313 /* 314 * Create the specified directory with the specified mode, taking certain 315 * precautions on they way. 316 */ 317 static void 318 make_dir(const char *path, int mode) 319 { 320 struct stat sb; 321 322 if (lstat(path, &sb) == 0) { 323 if (S_ISDIR(sb.st_mode)) 324 return; 325 /* 326 * Normally, we should either ask the user about removing 327 * the non-directory of the same name as a directory we 328 * wish to create, or respect the -n or -o command-line 329 * options. However, this may lead to a later failure or 330 * even compromise (if this non-directory happens to be a 331 * symlink to somewhere unsafe), so we don't. 332 */ 333 334 /* 335 * Don't check unlink() result; failure will cause mkdir() 336 * to fail later, which we will catch. 337 */ 338 (void)unlink(path); 339 } 340 if (mkdir(path, mode) != 0 && errno != EEXIST) 341 error("mkdir('%s')", path); 342 } 343 344 /* 345 * Ensure that all directories leading up to (but not including) the 346 * specified path exist. 347 * 348 * XXX inefficient + modifies the file in-place 349 */ 350 static void 351 make_parent(char *path) 352 { 353 struct stat sb; 354 char *sep; 355 356 sep = strrchr(path, '/'); 357 if (sep == NULL || sep == path) 358 return; 359 *sep = '\0'; 360 if (lstat(path, &sb) == 0) { 361 if (S_ISDIR(sb.st_mode)) { 362 *sep = '/'; 363 return; 364 } 365 unlink(path); 366 } 367 make_parent(path); 368 mkdir(path, 0755); 369 *sep = '/'; 370 371 #if 0 372 for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) { 373 /* root in case of absolute d_arg */ 374 if (sep == path) 375 continue; 376 *sep = '\0'; 377 make_dir(path, 0755); 378 *sep = '/'; 379 } 380 #endif 381 } 382 383 /* 384 * Extract a directory. 385 */ 386 static void 387 extract_dir(struct archive *a, struct archive_entry *e, const char *path) 388 { 389 int mode; 390 391 mode = archive_entry_filetype(e) & 0777; 392 if (mode == 0) 393 mode = 0755; 394 395 /* 396 * Some zipfiles contain directories with weird permissions such 397 * as 0644 or 0444. This can cause strange issues such as being 398 * unable to extract files into the directory we just created, or 399 * the user being unable to remove the directory later without 400 * first manually changing its permissions. Therefore, we whack 401 * the permissions into shape, assuming that the user wants full 402 * access and that anyone who gets read access also gets execute 403 * access. 404 */ 405 mode |= 0700; 406 if (mode & 0040) 407 mode |= 0010; 408 if (mode & 0004) 409 mode |= 0001; 410 411 info("d %s\n", path); 412 make_dir(path, mode); 413 ac(archive_read_data_skip(a)); 414 } 415 416 static unsigned char buffer[8192]; 417 static char spinner[] = { '|', '/', '-', '\\' }; 418 419 /* 420 * Extract a regular file. 421 */ 422 static void 423 extract_file(struct archive *a, struct archive_entry *e, const char *path) 424 { 425 int mode; 426 time_t mtime; 427 struct stat sb; 428 struct timeval tv[2]; 429 int cr, fd, text, warn; 430 ssize_t len; 431 unsigned char *p, *q, *end; 432 433 mode = archive_entry_filetype(e) & 0777; 434 if (mode == 0) 435 mode = 0644; 436 mtime = archive_entry_mtime(e); 437 438 /* look for existing file of same name */ 439 if (lstat(path, &sb) == 0) { 440 if (u_opt || f_opt) { 441 /* check if up-to-date */ 442 if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime) 443 return; 444 (void)unlink(path); 445 } else if (o_opt) { 446 /* overwrite */ 447 (void)unlink(path); 448 } else if (n_opt) { 449 /* do not overwrite */ 450 return; 451 } else { 452 /* XXX ask user */ 453 errorx("not implemented"); 454 } 455 } else { 456 if (f_opt) 457 return; 458 } 459 460 if ((fd = open(path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0) 461 error("open('%s')", path); 462 463 /* loop over file contents and write to disk */ 464 info("x %s", path); 465 text = a_opt; 466 warn = 0; 467 cr = 0; 468 for (int n = 0; ; n++) { 469 if (tty && (n % 4) == 0) 470 info(" %c\b\b", spinner[(n / 4) % sizeof spinner]); 471 472 len = archive_read_data(a, buffer, sizeof buffer); 473 474 if (len < 0) 475 ac(len); 476 477 /* left over CR from previous buffer */ 478 if (a_opt && cr) { 479 if (len == 0 || buffer[0] != '\n') 480 if (write(fd, "\r", 1) != 1) 481 error("write('%s')", path); 482 cr = 0; 483 } 484 485 /* EOF */ 486 if (len == 0) 487 break; 488 end = buffer + len; 489 490 /* 491 * Detect whether this is a text file. The correct way to 492 * do this is to check the least significant bit of the 493 * "internal file attributes" field of the corresponding 494 * file header in the central directory, but libarchive 495 * does not read the central directory, so we have to 496 * guess by looking for non-ASCII characters in the 497 * buffer. Hopefully we won't guess wrong. If we do 498 * guess wrong, we print a warning message later. 499 */ 500 if (a_opt && n == 0) { 501 for (p = buffer; p < end; ++p) { 502 if (!isascii((unsigned char)*p)) { 503 text = 0; 504 break; 505 } 506 } 507 } 508 509 /* simple case */ 510 if (!a_opt || !text) { 511 if (write(fd, buffer, len) != len) 512 error("write('%s')", path); 513 continue; 514 } 515 516 /* hard case: convert \r\n to \n (sigh...) */ 517 for (p = buffer; p < end; p = q + 1) { 518 for (q = p; q < end; q++) { 519 if (!warn && !isascii(*q)) { 520 warningx("%s may be corrupted due" 521 " to weak text file detection" 522 " heuristic", path); 523 warn = 1; 524 } 525 if (q[0] != '\r') 526 continue; 527 if (&q[1] == end) { 528 cr = 1; 529 break; 530 } 531 if (q[1] == '\n') 532 break; 533 } 534 if (write(fd, p, q - p) != q - p) 535 error("write('%s')", path); 536 } 537 } 538 if (tty) 539 info(" \b\b"); 540 if (text) 541 info(" (text)"); 542 info("\n"); 543 544 /* set access and modification time */ 545 tv[0].tv_sec = now; 546 tv[0].tv_usec = 0; 547 tv[1].tv_sec = mtime; 548 tv[1].tv_usec = 0; 549 if (futimes(fd, tv) != 0) 550 error("utimes('%s')", path); 551 if (close(fd) != 0) 552 error("close('%s')", path); 553 } 554 555 /* 556 * Extract a zipfile entry: first perform some sanity checks to ensure 557 * that it is either a directory or a regular file and that the path is 558 * not absolute and does not try to break out of the current directory; 559 * then call either extract_dir() or extract_file() as appropriate. 560 * 561 * This is complicated a bit by the various ways in which we need to 562 * manipulate the path name. Case conversion (if requested by the -L 563 * option) happens first, but the include / exclude patterns are applied 564 * to the full converted path name, before the directory part of the path 565 * is removed in accordance with the -j option. Sanity checks are 566 * intentionally done earlier than they need to be, so the user will get a 567 * warning about insecure paths even for files or directories which 568 * wouldn't be extracted anyway. 569 */ 570 static void 571 extract(struct archive *a, struct archive_entry *e) 572 { 573 char *pathname, *realpathname; 574 mode_t filetype; 575 char *p, *q; 576 577 pathname = pathdup(archive_entry_pathname(e)); 578 filetype = archive_entry_filetype(e); 579 580 /* sanity checks */ 581 if (pathname[0] == '/' || 582 strncmp(pathname, "../", 3) == 0 || 583 strstr(pathname, "/../") != NULL) { 584 warningx("skipping insecure entry '%s'", pathname); 585 ac(archive_read_data_skip(a)); 586 free(pathname); 587 return; 588 } 589 590 /* I don't think this can happen in a zipfile.. */ 591 if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { 592 warningx("skipping non-regular entry '%s'", pathname); 593 ac(archive_read_data_skip(a)); 594 free(pathname); 595 return; 596 } 597 598 /* skip directories in -j case */ 599 if (S_ISDIR(filetype) && j_opt) { 600 ac(archive_read_data_skip(a)); 601 free(pathname); 602 return; 603 } 604 605 /* apply include / exclude patterns */ 606 if (!accept_pathname(pathname)) { 607 ac(archive_read_data_skip(a)); 608 free(pathname); 609 return; 610 } 611 612 /* apply -j and -d */ 613 if (j_opt) { 614 for (p = q = pathname; *p; ++p) 615 if (*p == '/') 616 q = p + 1; 617 realpathname = pathcat(d_arg, q); 618 } else { 619 realpathname = pathcat(d_arg, pathname); 620 } 621 622 /* ensure that parent directory exists */ 623 make_parent(realpathname); 624 625 if (S_ISDIR(filetype)) 626 extract_dir(a, e, realpathname); 627 else 628 extract_file(a, e, realpathname); 629 630 free(realpathname); 631 free(pathname); 632 } 633 634 /* 635 * Print the name of an entry to stdout. 636 */ 637 static void 638 list(struct archive *a, struct archive_entry *e) 639 { 640 641 printf("%s\n", archive_entry_pathname(e)); 642 ac(archive_read_data_skip(a)); 643 } 644 645 /* 646 * Extract to memory to check CRC 647 */ 648 static void 649 test(struct archive *a, struct archive_entry *e) 650 { 651 ssize_t len; 652 653 if (S_ISDIR(archive_entry_filetype(e))) 654 return; 655 656 info("%s ", archive_entry_pathname(e)); 657 while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0) 658 /* nothing */; 659 if (len < 0) { 660 info("%s\n", archive_error_string(a)); 661 ++test_failed; 662 } else { 663 info("OK\n"); 664 } 665 666 /* shouldn't be necessary, but it doesn't hurt */ 667 ac(archive_read_data_skip(a)); 668 } 669 670 671 /* 672 * Main loop: open the zipfile, iterate over its contents and decide what 673 * to do with each entry. 674 */ 675 static void 676 unzip(const char *fn) 677 { 678 struct archive *a; 679 struct archive_entry *e; 680 int fd, ret; 681 682 if ((fd = open(fn, O_RDONLY)) < 0) 683 error("%s", fn); 684 685 a = archive_read_new(); 686 ac(archive_read_support_format_zip(a)); 687 ac(archive_read_open_fd(a, fd, 8192)); 688 689 for (;;) { 690 ret = archive_read_next_header(a, &e); 691 if (ret == ARCHIVE_EOF) 692 break; 693 ac(ret); 694 if (t_opt) 695 test(a, e); 696 else if (l_opt) 697 list(a, e); 698 else 699 extract(a, e); 700 } 701 702 ac(archive_read_close(a)); 703 (void)archive_read_finish(a); 704 if (close(fd) != 0) 705 error("%s", fn); 706 707 if (t_opt && test_failed) 708 errorx("%d checksum error(s) found.", test_failed); 709 } 710 711 static void 712 usage(void) 713 { 714 715 fprintf(stderr, "usage: unzip [-ajLlnoqtu] [-d dir] zipfile\n"); 716 exit(1); 717 } 718 719 static int 720 getopts(int argc, char *argv[]) 721 { 722 int opt; 723 724 optreset = optind = 1; 725 while ((opt = getopt(argc, argv, "ad:fjLlnoqtux:")) != -1) 726 switch (opt) { 727 case 'a': 728 a_opt = 1; 729 break; 730 case 'd': 731 d_arg = optarg; 732 break; 733 case 'f': 734 f_opt = 1; 735 break; 736 case 'j': 737 j_opt = 1; 738 break; 739 case 'L': 740 L_opt = 1; 741 break; 742 case 'l': 743 l_opt = 1; 744 break; 745 case 'n': 746 n_opt = 1; 747 break; 748 case 'o': 749 o_opt = 1; 750 break; 751 case 'q': 752 q_opt = 1; 753 break; 754 case 't': 755 t_opt = 1; 756 break; 757 case 'u': 758 u_opt = 1; 759 break; 760 case 'x': 761 add_pattern(&exclude, optarg); 762 break; 763 default: 764 usage(); 765 } 766 767 return (optind); 768 } 769 770 int 771 main(int argc, char *argv[]) 772 { 773 const char *zipfile; 774 int nopts; 775 776 if (isatty(STDOUT_FILENO)) 777 tty = 1; 778 779 if (getenv("UNZIP_DEBUG") != NULL) 780 unzip_debug = 1; 781 for (int i = 0; i < argc; ++i) 782 debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n'); 783 784 /* 785 * Info-ZIP's unzip(1) expects certain options to come before the 786 * zipfile name, and others to come after - though it does not 787 * enforce this. For simplicity, we accept *all* options both 788 * before and after the zipfile name. 789 */ 790 nopts = getopts(argc, argv); 791 792 if (argc <= nopts) 793 usage(); 794 zipfile = argv[nopts++]; 795 796 while (nopts < argc && *argv[nopts] != '-') 797 add_pattern(&include, argv[nopts++]); 798 799 nopts--; /* fake argv[0] */ 800 nopts += getopts(argc - nopts, argv + nopts); 801 802 if (n_opt + o_opt + u_opt > 1) 803 errorx("-n, -o and -u are contradictory"); 804 805 time(&now); 806 807 unzip(zipfile); 808 809 exit(0); 810 } 811