xref: /plan9/sys/src/cmd/gzip/unzip.c (revision ab28ad2a4e02e779ba816bd4ffb9e98fb557bade)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <flate.h>
5 #include "zip.h"
6 
7 enum
8 {
9 	BufSize	= 4096
10 };
11 
12 static	int	cheader(Biobuf *bin, ZipHead *zh);
13 static	int	copyout(int ofd, Biobuf *bin, long len);
14 static	int	crcwrite(void *ofd, void *buf, int n);
15 static	int	findCDir(Biobuf *bin, char *file);
16 static	int	get1(Biobuf *b);
17 static	int	get2(Biobuf *b);
18 static	ulong	get4(Biobuf *b);
19 static	char	*getname(Biobuf *b, int len);
20 static	int	header(Biobuf *bin, ZipHead *zh);
21 static	long	msdos2time(int time, int date);
22 static	int	sunzip(Biobuf *bin);
23 static	int	sunztable(Biobuf *bin);
24 static	void	trailer(Biobuf *bin, ZipHead *zh);
25 static	int	unzip(Biobuf *bin, char *file);
26 static	int	unzipEntry(Biobuf *bin, ZipHead *czh);
27 static	int	unztable(Biobuf *bin, char *file);
28 static	int	wantFile(char *file);
29 
30 static	void	*emalloc(ulong);
31 static	void	error(char*, ...);
32 #pragma	varargck	argpos	error	1
33 
34 static	Biobuf	bin;
35 static	ulong	crc;
36 static	ulong	*crctab;
37 static	int	debug;
38 static	char	*delfile;
39 static	int	lower;
40 static	int	nwant;
41 static	ulong	rlen;
42 static	int	settimes;
43 static	int	stdout;
44 static	int	verbose;
45 static	char	**want;
46 static	int	wbad;
47 static	ulong	wlen;
48 static	jmp_buf	zjmp;
49 static	jmp_buf	seekjmp;
50 static	int	autodir;
51 
52 static void
usage(void)53 usage(void)
54 {
55 	fprint(2, "usage: unzip [-cistTvD] [-f zipfile] [file ...]\n");
56 	exits("usage");
57 }
58 
59 void
main(int argc,char * argv[])60 main(int argc, char *argv[])
61 {
62 	char *zfile;
63 	int fd, ok, table, stream;
64 
65 	table = 0;
66 	stream = 0;
67 	zfile = nil;
68 	ARGBEGIN{
69 	case 'a':
70 		autodir++;
71 		break;
72 	case 'D':
73 		debug++;
74 		break;
75 	case 'c':
76 		stdout++;
77 		break;
78 	case 'i':
79 		lower++;
80 		break;
81 	case 'f':
82 		zfile = ARGF();
83 		if(zfile == nil)
84 			usage();
85 		break;
86 	case 's':
87 		stream++;
88 		break;
89 	case 't':
90 		table++;
91 		break;
92 	case 'T':
93 		settimes++;
94 		break;
95 	case 'v':
96 		verbose++;
97 		break;
98 	default:
99 		usage();
100 		break;
101 	}ARGEND
102 
103 	nwant = argc;
104 	want = argv;
105 
106 	crctab = mkcrctab(ZCrcPoly);
107 	ok = inflateinit();
108 	if(ok != FlateOk)
109 		sysfatal("inflateinit failed: %s", flateerr(ok));
110 
111 	if(zfile == nil){
112 		Binit(&bin, 0, OREAD);
113 		zfile = "<stdin>";
114 	}else{
115 		fd = open(zfile, OREAD);
116 		if(fd < 0)
117 			sysfatal("can't open %s: %r", zfile);
118 		Binit(&bin, fd, OREAD);
119 	}
120 
121 	if(setjmp(seekjmp)){
122 		fprint(2, "trying to re-run assuming -s\n");
123 		stream = 1;
124 		Bseek(&bin, 0, 0);
125 	}
126 
127 	if(table){
128 		if(stream)
129 			ok = sunztable(&bin);
130 		else
131 			ok = unztable(&bin, zfile);
132 	}else{
133 		if(stream)
134 			ok = sunzip(&bin);
135 		else
136 			ok = unzip(&bin, zfile);
137 	}
138 
139 	exits(ok ? nil: "errors");
140 }
141 
142 /*
143  * print the table of contents from the "central directory structure"
144  */
145 static int
unztable(Biobuf * bin,char * file)146 unztable(Biobuf *bin, char *file)
147 {
148 	ZipHead zh;
149 	int entries;
150 
151 	entries = findCDir(bin, file);
152 	if(entries < 0)
153 		return 0;
154 
155 	if(verbose > 1)
156 		print("%d items in the archive\n", entries);
157 	while(entries-- > 0){
158 		if(setjmp(zjmp)){
159 			free(zh.file);
160 			return 0;
161 		}
162 
163 		memset(&zh, 0, sizeof(zh));
164 		if(!cheader(bin, &zh))
165 			return 1;
166 
167 		if(wantFile(zh.file)){
168 			if(verbose)
169 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
170 			else
171 				print("%s\n", zh.file);
172 
173 			if(verbose > 1){
174 				print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
175 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
176 				print("\tflags %x\n", zh.flags);
177 				print("\tmethod %d\n", zh.meth);
178 				print("\tmod time %d\n", zh.modtime);
179 				print("\tmod date %d\n", zh.moddate);
180 				print("\tcrc %lux\n", zh.crc);
181 				print("\tcompressed size %lud\n", zh.csize);
182 				print("\tuncompressed size %lud\n", zh.uncsize);
183 				print("\tinternal attributes %ux\n", zh.iattr);
184 				print("\texternal attributes %lux\n", zh.eattr);
185 				print("\tstarts at %ld\n", zh.off);
186 			}
187 		}
188 
189 		free(zh.file);
190 		zh.file = nil;
191 	}
192 
193 	return 1;
194 }
195 
196 /*
197  * print the "local file header" table of contents
198  */
199 static int
sunztable(Biobuf * bin)200 sunztable(Biobuf *bin)
201 {
202 	ZipHead zh;
203 	vlong off;
204 	ulong hcrc, hcsize, huncsize;
205 	int ok, err;
206 
207 	ok = 1;
208 	for(;;){
209 		if(setjmp(zjmp)){
210 			free(zh.file);
211 			return 0;
212 		}
213 
214 		memset(&zh, 0, sizeof(zh));
215 		if(!header(bin, &zh))
216 			return ok;
217 
218 		hcrc = zh.crc;
219 		hcsize = zh.csize;
220 		huncsize = zh.uncsize;
221 
222 		wlen = 0;
223 		rlen = 0;
224 		crc = 0;
225 		wbad = 0;
226 
227 		if(zh.meth == 0){
228 			if(!copyout(-1, bin, zh.csize))
229 				error("reading data for %s failed: %r", zh.file);
230 		}else if(zh.meth == 8){
231 			off = Boffset(bin);
232 			err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
233 			if(err != FlateOk)
234 				error("inflate %s failed: %s", zh.file, flateerr(err));
235 			rlen = Boffset(bin) - off;
236 		}else
237 			error("can't handle compression method %d for %s", zh.meth, zh.file);
238 
239 		trailer(bin, &zh);
240 
241 		if(wantFile(zh.file)){
242 			if(verbose)
243 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
244 			else
245 				print("%s\n", zh.file);
246 
247 			if(verbose > 1){
248 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
249 				print("\tflags %x\n", zh.flags);
250 				print("\tmethod %d\n", zh.meth);
251 				print("\tmod time %d\n", zh.modtime);
252 				print("\tmod date %d\n", zh.moddate);
253 				print("\tcrc %lux\n", zh.crc);
254 				print("\tcompressed size %lud\n", zh.csize);
255 				print("\tuncompressed size %lud\n", zh.uncsize);
256 				if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
257 					print("\theader crc %lux\n", zh.crc);
258 					print("\theader compressed size %lud\n", zh.csize);
259 					print("\theader uncompressed size %lud\n", zh.uncsize);
260 				}
261 			}
262 		}
263 
264 		if(zh.crc != crc)
265 			error("crc mismatch for %s", zh.file);
266 		if(zh.uncsize != wlen)
267 			error("output size mismatch for %s", zh.file);
268 		if(zh.csize != rlen)
269 			error("input size mismatch for %s", zh.file);
270 
271 
272 		free(zh.file);
273 		zh.file = nil;
274 	}
275 }
276 
277 /*
278  * extract files using the info in the central directory structure
279  */
280 static int
unzip(Biobuf * bin,char * file)281 unzip(Biobuf *bin, char *file)
282 {
283 	ZipHead zh;
284 	vlong off;
285 	int ok, eok, entries;
286 
287 	entries = findCDir(bin, file);
288 	if(entries < 0)
289 		return 0;
290 
291 	ok = 1;
292 	while(entries-- > 0){
293 		if(setjmp(zjmp)){
294 			free(zh.file);
295 			return 0;
296 		}
297 		memset(&zh, 0, sizeof(zh));
298 		if(!cheader(bin, &zh))
299 			return ok;
300 
301 
302 		off = Boffset(bin);
303 		if(wantFile(zh.file)){
304 			if(Bseek(bin, zh.off, 0) < 0){
305 				fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
306 				ok = 0;
307 			}else{
308 				eok = unzipEntry(bin, &zh);
309 				if(eok <= 0){
310 					fprint(2, "unzip: skipping %s\n", zh.file);
311 					ok = 0;
312 				}
313 			}
314 		}
315 
316 		free(zh.file);
317 		zh.file = nil;
318 
319 		if(Bseek(bin, off, 0) < 0){
320 			fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
321 			return 0;
322 		}
323 	}
324 
325 	return ok;
326 }
327 
328 /*
329  * extract files using the info the "local file headers"
330  */
331 static int
sunzip(Biobuf * bin)332 sunzip(Biobuf *bin)
333 {
334 	int eok;
335 
336 	for(;;){
337 		eok = unzipEntry(bin, nil);
338 		if(eok == 0)
339 			return 1;
340 		if(eok < 0)
341 			return 0;
342 	}
343 }
344 
345 static int mkdirs(char *);
346 
347 /*
348  * if any directories leading up to path don't exist, create them.
349  * modifies but restores path.
350  */
351 static int
mkpdirs(char * path)352 mkpdirs(char *path)
353 {
354 	int rv = 0;
355 	char *sl = strrchr(path, '/');
356 print("%s\n", path);
357 	if (sl != nil) {
358 		*sl = '\0';
359 		rv = mkdirs(path);
360 		*sl = '/';
361 	}
362 	return rv;
363 }
364 
365 /*
366  * if path or any directories leading up to it don't exist, create them.
367  * modifies but restores path.
368  */
369 static int
mkdirs(char * path)370 mkdirs(char *path)
371 {
372 	int fd;
373 
374 	if (access(path, AEXIST) >= 0)
375 		return 0;
376 
377 	/* make presumed-missing intermediate directories */
378 	if (mkpdirs(path) < 0)
379 		return -1;
380 
381 	/* make final directory */
382 	fd = create(path, OREAD, 0755|DMDIR);
383 	if (fd < 0)
384 		/*
385 		 * we may have lost a race; if the directory now exists,
386 		 * it's okay.
387 		 */
388 		return access(path, AEXIST) < 0? -1: 0;
389 	close(fd);
390 	return 0;
391 }
392 
393 
394 /*
395  * extracts a single entry from a zip file
396  * czh is the optional corresponding central directory entry
397  */
398 static int
unzipEntry(Biobuf * bin,ZipHead * czh)399 unzipEntry(Biobuf *bin, ZipHead *czh)
400 {
401 	Dir *d;
402 	ZipHead zh;
403 	char *p;
404 	vlong off;
405 	int fd, isdir, ok, err;
406 
407 	zh.file = nil;
408 	if(setjmp(zjmp)){
409 		delfile = nil;
410 		free(zh.file);
411 		return -1;
412 	}
413 
414 	memset(&zh, 0, sizeof(zh));
415 	if(!header(bin, &zh))
416 		return 0;
417 
418 	ok = 1;
419 	isdir = 0;
420 
421 	fd = -1;
422 	if(wantFile(zh.file)){
423 		if(verbose)
424 			fprint(2, "extracting %s\n", zh.file);
425 
426 		if(czh != nil && czh->extos == ZDos){
427 			isdir = czh->eattr & ZDDir;
428 			if(isdir && zh.uncsize != 0)
429 				fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
430 		}
431 		if(zh.meth == 0 && zh.uncsize == 0){
432 			p = strchr(zh.file, '\0');
433 			if(p > zh.file && p[-1] == '/')
434 				isdir = 1;
435 		}
436 
437 		if(stdout){
438 			if(ok && !isdir)
439 				fd = 1;
440 		}else if(isdir){
441 			fd = create(zh.file, OREAD, DMDIR | 0775);
442 			if(fd < 0){
443 				d = dirstat(zh.file);
444 				if(d == nil || (d->mode & DMDIR) != DMDIR){
445 					fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
446 					ok = 0;
447 				}
448 				free(d);
449 			}
450 		}else if(ok){
451 			if(autodir)
452 				mkpdirs(zh.file);
453 			fd = create(zh.file, OWRITE, 0664);
454 			if(fd < 0){
455 				fprint(2, "unzip: can't create %s: %r\n", zh.file);
456 				ok = 0;
457 			}else
458 				delfile = zh.file;
459 		}
460 	}
461 
462 	wlen = 0;
463 	rlen = 0;
464 	crc = 0;
465 	wbad = 0;
466 
467 	if(zh.meth == 0){
468 		if(!copyout(fd, bin, zh.csize))
469 			error("copying data for %s failed: %r", zh.file);
470 	}else if(zh.meth == 8){
471 		off = Boffset(bin);
472 		err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
473 		if(err != FlateOk)
474 			error("inflate failed: %s", flateerr(err));
475 		rlen = Boffset(bin) - off;
476 	}else
477 		error("can't handle compression method %d for %s", zh.meth, zh.file);
478 
479 	trailer(bin, &zh);
480 
481 	if(zh.crc != crc)
482 		error("crc mismatch for %s", zh.file);
483 	if(zh.uncsize != wlen)
484 		error("output size mismatch for %s", zh.file);
485 	if(zh.csize != rlen)
486 		error("input size mismatch for %s", zh.file);
487 
488 	delfile = nil;
489 	free(zh.file);
490 
491 	if(fd >= 0 && !stdout){
492 		if(settimes){
493 			d = dirfstat(fd);
494 			if(d != nil){
495 				d->mtime = msdos2time(zh.modtime, zh.moddate);
496 				if(d->mtime)
497 					dirfwstat(fd, d);
498 			}
499 		}
500 		close(fd);
501 	}
502 
503 	return ok;
504 }
505 
506 static int
wantFile(char * file)507 wantFile(char *file)
508 {
509 	int i, n;
510 
511 	if(nwant == 0)
512 		return 1;
513 	for(i = 0; i < nwant; i++){
514 		if(strcmp(want[i], file) == 0)
515 			return 1;
516 		n = strlen(want[i]);
517 		if(strncmp(want[i], file, n) == 0 && file[n] == '/')
518 			return 1;
519 	}
520 	return 0;
521 }
522 
523 /*
524  * find the start of the central directory
525  * returns the number of entries in the directory,
526  * or -1 if there was an error
527  */
528 static int
findCDir(Biobuf * bin,char * file)529 findCDir(Biobuf *bin, char *file)
530 {
531 	vlong ecoff;
532 	long off, size, m;
533 	int entries, zclen, dn, ds, de;
534 
535 	ecoff = Bseek(bin, -ZECHeadSize, 2);
536 	if(ecoff < 0){
537 		fprint(2, "unzip: can't seek to contents of %s\n", file);
538 		longjmp(seekjmp, 1);
539 		return -1;
540 	}
541 	if(setjmp(zjmp))
542 		return -1;
543 
544 	if((m=get4(bin)) != ZECHeader){
545 		fprint(2, "unzip: bad magic number for table of contents of %s: %#.8lx\n", file, m);
546 		longjmp(seekjmp, 1);
547 		return -1;
548 	}
549 	dn = get2(bin);
550 	ds = get2(bin);
551 	de = get2(bin);
552 	entries = get2(bin);
553 	size = get4(bin);
554 	off = get4(bin);
555 	zclen = get2(bin);
556 	while(zclen-- > 0)
557 		get1(bin);
558 
559 	if(verbose > 1){
560 		print("table starts at %ld for %ld bytes\n", off, size);
561 		if(ecoff - size != off)
562 			print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
563 		if(dn || ds || de != entries)
564 			print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
565 	}
566 
567 	if(Bseek(bin, off, 0) != off){
568 		fprint(2, "unzip: can't seek to start of contents of %s\n", file);
569 		longjmp(seekjmp, 1);
570 		return -1;
571 	}
572 
573 	return entries;
574 }
575 
576 static int
cheader(Biobuf * bin,ZipHead * zh)577 cheader(Biobuf *bin, ZipHead *zh)
578 {
579 	ulong v;
580 	int flen, xlen, fclen;
581 
582 	v = get4(bin);
583 	if(v != ZCHeader){
584 		if(v == ZECHeader)
585 			return 0;
586 		error("bad magic number %lux", v);
587 	}
588 	zh->madevers = get1(bin);
589 	zh->madeos = get1(bin);
590 	zh->extvers = get1(bin);
591 	zh->extos = get1(bin);
592 	zh->flags = get2(bin);
593 	zh->meth = get2(bin);
594 	zh->modtime = get2(bin);
595 	zh->moddate = get2(bin);
596 	zh->crc = get4(bin);
597 	zh->csize = get4(bin);
598 	zh->uncsize = get4(bin);
599 	flen = get2(bin);
600 	xlen = get2(bin);
601 	fclen = get2(bin);
602 	get2(bin);		/* disk number start */
603 	zh->iattr = get2(bin);
604 	zh->eattr = get4(bin);
605 	zh->off = get4(bin);
606 
607 	zh->file = getname(bin, flen);
608 
609 	while(xlen-- > 0)
610 		get1(bin);
611 
612 	while(fclen-- > 0)
613 		get1(bin);
614 
615 	return 1;
616 }
617 
618 static int
header(Biobuf * bin,ZipHead * zh)619 header(Biobuf *bin, ZipHead *zh)
620 {
621 	ulong v;
622 	int flen, xlen;
623 
624 	v = get4(bin);
625 	if(v != ZHeader){
626 		if(v == ZCHeader)
627 			return 0;
628 		error("bad magic number %lux at %lld", v, Boffset(bin)-4);
629 	}
630 	zh->extvers = get1(bin);
631 	zh->extos = get1(bin);
632 	zh->flags = get2(bin);
633 	zh->meth = get2(bin);
634 	zh->modtime = get2(bin);
635 	zh->moddate = get2(bin);
636 	zh->crc = get4(bin);
637 	zh->csize = get4(bin);
638 	zh->uncsize = get4(bin);
639 	flen = get2(bin);
640 	xlen = get2(bin);
641 
642 	zh->file = getname(bin, flen);
643 
644 	while(xlen-- > 0)
645 		get1(bin);
646 
647 	return 1;
648 }
649 
650 static void
trailer(Biobuf * bin,ZipHead * zh)651 trailer(Biobuf *bin, ZipHead *zh)
652 {
653 	if(zh->flags & ZTrailInfo){
654 		zh->crc = get4(bin);
655 		zh->csize = get4(bin);
656 		zh->uncsize = get4(bin);
657 	}
658 }
659 
660 static char*
getname(Biobuf * bin,int len)661 getname(Biobuf *bin, int len)
662 {
663 	char *s;
664 	int i, c;
665 
666 	s = emalloc(len + 1);
667 	for(i = 0; i < len; i++){
668 		c = get1(bin);
669 		if(lower)
670 			c = tolower(c);
671 		s[i] = c;
672 	}
673 	s[i] = '\0';
674 	return s;
675 }
676 
677 static int
crcwrite(void * out,void * buf,int n)678 crcwrite(void *out, void *buf, int n)
679 {
680 	int fd, nw;
681 
682 	wlen += n;
683 	crc = blockcrc(crctab, crc, buf, n);
684 	fd = (int)(uintptr)out;
685 	if(fd < 0)
686 		return n;
687 	nw = write(fd, buf, n);
688 	if(nw != n)
689 		wbad = 1;
690 	return nw;
691 }
692 
693 static int
copyout(int ofd,Biobuf * bin,long len)694 copyout(int ofd, Biobuf *bin, long len)
695 {
696 	char buf[BufSize];
697 	int n;
698 
699 	for(; len > 0; len -= n){
700 		n = len;
701 		if(n > BufSize)
702 			n = BufSize;
703 		n = Bread(bin, buf, n);
704 		if(n <= 0)
705 			return 0;
706 		rlen += n;
707 		if(crcwrite((void*)ofd, buf, n) != n)
708 			return 0;
709 	}
710 	return 1;
711 }
712 
713 static ulong
get4(Biobuf * b)714 get4(Biobuf *b)
715 {
716 	ulong v;
717 	int i, c;
718 
719 	v = 0;
720 	for(i = 0; i < 4; i++){
721 		c = Bgetc(b);
722 		if(c < 0)
723 			error("unexpected eof reading file information");
724 		v |= c << (i * 8);
725 	}
726 	return v;
727 }
728 
729 static int
get2(Biobuf * b)730 get2(Biobuf *b)
731 {
732 	int i, c, v;
733 
734 	v = 0;
735 	for(i = 0; i < 2; i++){
736 		c = Bgetc(b);
737 		if(c < 0)
738 			error("unexpected eof reading file information");
739 		v |= c << (i * 8);
740 	}
741 	return v;
742 }
743 
744 static int
get1(Biobuf * b)745 get1(Biobuf *b)
746 {
747 	int c;
748 
749 	c = Bgetc(b);
750 	if(c < 0)
751 		error("unexpected eof reading file information");
752 	return c;
753 }
754 
755 static long
msdos2time(int time,int date)756 msdos2time(int time, int date)
757 {
758 	Tm tm;
759 
760 	tm.hour = time >> 11;
761 	tm.min = (time >> 5) & 63;
762 	tm.sec = (time & 31) << 1;
763 	tm.year = 80 + (date >> 9);
764 	tm.mon = ((date >> 5) & 15) - 1;
765 	tm.mday = date & 31;
766 	tm.zone[0] = '\0';
767 	tm.yday = 0;
768 
769 	return tm2sec(&tm);
770 }
771 
772 static void*
emalloc(ulong n)773 emalloc(ulong n)
774 {
775 	void *p;
776 
777 	p = malloc(n);
778 	if(p == nil)
779 		sysfatal("out of memory");
780 	return p;
781 }
782 
783 static void
error(char * fmt,...)784 error(char *fmt, ...)
785 {
786 	va_list arg;
787 
788 	fprint(2, "unzip: ");
789 	va_start(arg, fmt);
790 	vfprint(2, fmt, arg);
791 	va_end(arg);
792 	fprint(2, "\n");
793 
794 	if(delfile != nil){
795 		fprint(2, "unzip: removing output file %s\n", delfile);
796 		remove(delfile);
797 		delfile = nil;
798 	}
799 
800 	longjmp(zjmp, 1);
801 }
802