xref: /plan9/sys/src/cmd/gzip/unzip.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <flate.h>
5 #include "zip.h"
6 
7 enum
8 {
9 	BufSize	= 4096
10 };
11 
12 static	int	cheader(Biobuf *bin, ZipHead *zh);
13 static	int	copyout(int ofd, Biobuf *bin, long len);
14 static	int	crcwrite(void *ofd, void *buf, int n);
15 static	int	findCDir(Biobuf *bin, char *file);
16 static	int	get1(Biobuf *b);
17 static	int	get2(Biobuf *b);
18 static	ulong	get4(Biobuf *b);
19 static	char	*getname(Biobuf *b, int len);
20 static	int	header(Biobuf *bin, ZipHead *zh);
21 static	long	msdos2time(int time, int date);
22 static	int	sunzip(Biobuf *bin);
23 static	int	sunztable(Biobuf *bin);
24 static	void	trailer(Biobuf *bin, ZipHead *zh);
25 static	int	unzip(Biobuf *bin, char *file);
26 static	int	unzipEntry(Biobuf *bin, ZipHead *czh);
27 static	int	unztable(Biobuf *bin, char *file);
28 static	int	wantFile(char *file);
29 
30 static	void	*emalloc(ulong);
31 static	void	error(char*, ...);
32 #pragma	varargck	argpos	error	1
33 
34 static	Biobuf	bin;
35 static	ulong	crc;
36 static	ulong	*crctab;
37 static	int	debug;
38 static	char	*delfile;
39 static	int	lower;
40 static	int	nwant;
41 static	ulong	rlen;
42 static	int	settimes;
43 static	int	stdout;
44 static	int	verbose;
45 static	char	**want;
46 static	int	wbad;
47 static	ulong	wlen;
48 static	jmp_buf	zjmp;
49 static	jmp_buf	seekjmp;
50 
51 static void
52 usage(void)
53 {
54 	fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
55 	exits("usage");
56 }
57 
58 void
59 main(int argc, char *argv[])
60 {
61 	char *zfile;
62 	int fd, ok, table, stream;
63 
64 	table = 0;
65 	stream = 0;
66 	zfile = nil;
67 	ARGBEGIN{
68 	case 'D':
69 		debug++;
70 		break;
71 	case 'c':
72 		stdout++;
73 		break;
74 	case 'i':
75 		lower++;
76 		break;
77 	case 'f':
78 		zfile = ARGF();
79 		if(zfile == nil)
80 			usage();
81 		break;
82 	case 's':
83 		stream++;
84 		break;
85 	case 't':
86 		table++;
87 		break;
88 	case 'T':
89 		settimes++;
90 		break;
91 	case 'v':
92 		verbose++;
93 		break;
94 	default:
95 		usage();
96 		break;
97 	}ARGEND
98 
99 	nwant = argc;
100 	want = argv;
101 
102 	crctab = mkcrctab(ZCrcPoly);
103 	ok = inflateinit();
104 	if(ok != FlateOk)
105 		sysfatal("inflateinit failed: %s\n", flateerr(ok));
106 
107 	if(zfile == nil){
108 		Binit(&bin, 0, OREAD);
109 		zfile = "<stdin>";
110 	}else{
111 		fd = open(zfile, OREAD);
112 		if(fd < 0)
113 			sysfatal("can't open %s: %r", zfile);
114 		Binit(&bin, fd, OREAD);
115 	}
116 
117 	if(setjmp(seekjmp)){
118 		fprint(2, "trying to re-run assuming -s\n");
119 		stream = 1;
120 		Bseek(&bin, 0, 0);
121 	}
122 
123 	if(table){
124 		if(stream)
125 			ok = sunztable(&bin);
126 		else
127 			ok = unztable(&bin, zfile);
128 	}else{
129 		if(stream)
130 			ok = sunzip(&bin);
131 		else
132 			ok = unzip(&bin, zfile);
133 	}
134 
135 	exits(ok ? nil: "errors");
136 }
137 
138 /*
139  * print the table of contents from the "central directory structure"
140  */
141 static int
142 unztable(Biobuf *bin, char *file)
143 {
144 	ZipHead zh;
145 	int entries;
146 
147 	entries = findCDir(bin, file);
148 	if(entries < 0)
149 		return 0;
150 
151 	if(verbose > 1)
152 		print("%d items in the archive\n", entries);
153 	while(entries-- > 0){
154 		if(setjmp(zjmp)){
155 			free(zh.file);
156 			return 0;
157 		}
158 
159 		memset(&zh, 0, sizeof(zh));
160 		if(!cheader(bin, &zh))
161 			return 1;
162 
163 		if(wantFile(zh.file)){
164 			if(verbose)
165 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
166 			else
167 				print("%s\n", zh.file);
168 
169 			if(verbose > 1){
170 				print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
171 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
172 				print("\tflags %x\n", zh.flags);
173 				print("\tmethod %d\n", zh.meth);
174 				print("\tmod time %d\n", zh.modtime);
175 				print("\tmod date %d\n", zh.moddate);
176 				print("\tcrc %lux\n", zh.crc);
177 				print("\tcompressed size %lud\n", zh.csize);
178 				print("\tuncompressed size %lud\n", zh.uncsize);
179 				print("\tinternal attributes %ux\n", zh.iattr);
180 				print("\texternal attributes %lux\n", zh.eattr);
181 				print("\tstarts at %ld\n", zh.off);
182 			}
183 		}
184 
185 		free(zh.file);
186 		zh.file = nil;
187 	}
188 
189 	return 1;
190 }
191 
192 /*
193  * print the "local file header" table of contents
194  */
195 static int
196 sunztable(Biobuf *bin)
197 {
198 	ZipHead zh;
199 	vlong off;
200 	ulong hcrc, hcsize, huncsize;
201 	int ok, err;
202 
203 	ok = 1;
204 	for(;;){
205 		if(setjmp(zjmp)){
206 			free(zh.file);
207 			return 0;
208 		}
209 
210 		memset(&zh, 0, sizeof(zh));
211 		if(!header(bin, &zh))
212 			return ok;
213 
214 		hcrc = zh.crc;
215 		hcsize = zh.csize;
216 		huncsize = zh.uncsize;
217 
218 		wlen = 0;
219 		rlen = 0;
220 		crc = 0;
221 		wbad = 0;
222 
223 		if(zh.meth == 0){
224 			if(!copyout(-1, bin, zh.csize))
225 				error("reading data for %s failed: %r", zh.file);
226 		}else if(zh.meth == 8){
227 			off = Boffset(bin);
228 			err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
229 			if(err != FlateOk)
230 				error("inflate %s failed: %s", zh.file, flateerr(err));
231 			rlen = Boffset(bin) - off;
232 		}else
233 			error("can't handle compression method %d for %s", zh.meth, zh.file);
234 
235 		trailer(bin, &zh);
236 
237 		if(wantFile(zh.file)){
238 			if(verbose)
239 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
240 			else
241 				print("%s\n", zh.file);
242 
243 			if(verbose > 1){
244 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
245 				print("\tflags %x\n", zh.flags);
246 				print("\tmethod %d\n", zh.meth);
247 				print("\tmod time %d\n", zh.modtime);
248 				print("\tmod date %d\n", zh.moddate);
249 				print("\tcrc %lux\n", zh.crc);
250 				print("\tcompressed size %lud\n", zh.csize);
251 				print("\tuncompressed size %lud\n", zh.uncsize);
252 				if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
253 					print("\theader crc %lux\n", zh.crc);
254 					print("\theader compressed size %lud\n", zh.csize);
255 					print("\theader uncompressed size %lud\n", zh.uncsize);
256 				}
257 			}
258 		}
259 
260 		if(zh.crc != crc)
261 			error("crc mismatch for %s", zh.file);
262 		if(zh.uncsize != wlen)
263 			error("output size mismatch for %s", zh.file);
264 		if(zh.csize != rlen)
265 			error("input size mismatch for %s", zh.file);
266 
267 
268 		free(zh.file);
269 		zh.file = nil;
270 	}
271 }
272 
273 /*
274  * extract files using the info in the central directory structure
275  */
276 static int
277 unzip(Biobuf *bin, char *file)
278 {
279 	ZipHead zh;
280 	vlong off;
281 	int ok, eok, entries;
282 
283 	entries = findCDir(bin, file);
284 	if(entries < 0)
285 		return 0;
286 
287 	ok = 1;
288 	while(entries-- > 0){
289 		if(setjmp(zjmp)){
290 			free(zh.file);
291 			return 0;
292 		}
293 		memset(&zh, 0, sizeof(zh));
294 		if(!cheader(bin, &zh))
295 			return ok;
296 
297 
298 		off = Boffset(bin);
299 		if(wantFile(zh.file)){
300 			if(Bseek(bin, zh.off, 0) < 0){
301 				fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
302 				ok = 0;
303 			}else{
304 				eok = unzipEntry(bin, &zh);
305 				if(eok <= 0){
306 					fprint(2, "unzip: skipping %s\n", zh.file);
307 					ok = 0;
308 				}
309 			}
310 		}
311 
312 		free(zh.file);
313 		zh.file = nil;
314 
315 		if(Bseek(bin, off, 0) < 0){
316 			fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
317 			return 0;
318 		}
319 	}
320 
321 	return ok;
322 }
323 
324 /*
325  * extract files using the info the "local file headers"
326  */
327 static int
328 sunzip(Biobuf *bin)
329 {
330 	int eok;
331 
332 	for(;;){
333 		eok = unzipEntry(bin, nil);
334 		if(eok == 0)
335 			return 1;
336 		if(eok < 0)
337 			return 0;
338 	}
339 }
340 
341 /*
342  * extracts a single entry from a zip file
343  * czh is the optional corresponding central directory entry
344  */
345 static int
346 unzipEntry(Biobuf *bin, ZipHead *czh)
347 {
348 	Dir *d;
349 	ZipHead zh;
350 	char *p;
351 	vlong off;
352 	int fd, isdir, ok, err;
353 
354 	zh.file = nil;
355 	if(setjmp(zjmp)){
356 		delfile = nil;
357 		free(zh.file);
358 		return -1;
359 	}
360 
361 	memset(&zh, 0, sizeof(zh));
362 	if(!header(bin, &zh))
363 		return 0;
364 
365 	ok = 1;
366 	isdir = 0;
367 
368 	fd = -1;
369 	if(wantFile(zh.file)){
370 		if(verbose)
371 			fprint(2, "extracting %s\n", zh.file);
372 
373 		if(czh != nil && czh->extos == ZDos){
374 			isdir = czh->eattr & ZDDir;
375 			if(isdir && zh.uncsize != 0)
376 				fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
377 		}
378 		if(zh.meth == 0 && zh.uncsize == 0){
379 			p = strchr(zh.file, '\0');
380 			if(p > zh.file && p[-1] == '/')
381 				isdir = 1;
382 		}
383 
384 		if(stdout){
385 			if(ok && !isdir)
386 				fd = 1;
387 		}else if(isdir){
388 			fd = create(zh.file, OREAD, DMDIR | 0775);
389 			if(fd < 0){
390 				d = dirstat(zh.file);
391 				if(d == nil || (d->mode & DMDIR) != DMDIR){
392 					fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
393 					ok = 0;
394 				}
395 				free(d);
396 			}
397 		}else if(ok){
398 			fd = create(zh.file, OWRITE, 0664);
399 			if(fd < 0){
400 				fprint(2, "unzip: can't create %s: %r\n", zh.file);
401 				ok = 0;
402 			}else
403 				delfile = zh.file;
404 		}
405 	}
406 
407 	wlen = 0;
408 	rlen = 0;
409 	crc = 0;
410 	wbad = 0;
411 
412 	if(zh.meth == 0){
413 		if(!copyout(fd, bin, zh.csize))
414 			error("copying data for %s failed: %r", zh.file);
415 	}else if(zh.meth == 8){
416 		off = Boffset(bin);
417 		err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
418 		if(err != FlateOk)
419 			error("inflate failed: %s", flateerr(err));
420 		rlen = Boffset(bin) - off;
421 	}else
422 		error("can't handle compression method %d for %s", zh.meth, zh.file);
423 
424 	trailer(bin, &zh);
425 
426 	if(zh.crc != crc)
427 		error("crc mismatch for %s", zh.file);
428 	if(zh.uncsize != wlen)
429 		error("output size mismatch for %s", zh.file);
430 	if(zh.csize != rlen)
431 		error("input size mismatch for %s", zh.file);
432 
433 	delfile = nil;
434 	free(zh.file);
435 
436 	if(fd >= 0 && !stdout){
437 		if(settimes){
438 			d = dirfstat(fd);
439 			if(d != nil){
440 				d->mtime = msdos2time(zh.modtime, zh.moddate);
441 				if(d->mtime)
442 					dirfwstat(fd, d);
443 			}
444 		}
445 		close(fd);
446 	}
447 
448 	return ok;
449 }
450 
451 static int
452 wantFile(char *file)
453 {
454 	int i, n;
455 
456 	if(nwant == 0)
457 		return 1;
458 	for(i = 0; i < nwant; i++){
459 		if(strcmp(want[i], file) == 0)
460 			return 1;
461 		n = strlen(want[i]);
462 		if(strncmp(want[i], file, n) == 0 && file[n] == '/')
463 			return 1;
464 	}
465 	return 0;
466 }
467 
468 /*
469  * find the start of the central directory
470  * returns the number of entries in the directory,
471  * or -1 if there was an error
472  */
473 static int
474 findCDir(Biobuf *bin, char *file)
475 {
476 	vlong ecoff;
477 	long off, size, m;
478 	int entries, zclen, dn, ds, de;
479 
480 	ecoff = Bseek(bin, -ZECHeadSize, 2);
481 	if(ecoff < 0){
482 		fprint(2, "unzip: can't seek to contents of %s\n", file);
483 		longjmp(seekjmp, 1);
484 		return -1;
485 	}
486 	if(setjmp(zjmp))
487 		return -1;
488 
489 	if((m=get4(bin)) != ZECHeader){
490 		fprint(2, "unzip: bad magic number for table of contents of %s: %#.8lx\n", file, m);
491 		longjmp(seekjmp, 1);
492 		return -1;
493 	}
494 	dn = get2(bin);
495 	ds = get2(bin);
496 	de = get2(bin);
497 	entries = get2(bin);
498 	size = get4(bin);
499 	off = get4(bin);
500 	zclen = get2(bin);
501 	while(zclen-- > 0)
502 		get1(bin);
503 
504 	if(verbose > 1){
505 		print("table starts at %ld for %ld bytes\n", off, size);
506 		if(ecoff - size != off)
507 			print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
508 		if(dn || ds || de != entries)
509 			print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
510 	}
511 
512 	if(Bseek(bin, off, 0) != off){
513 		fprint(2, "unzip: can't seek to start of contents of %s\n", file);
514 		longjmp(seekjmp, 1);
515 		return -1;
516 	}
517 
518 	return entries;
519 }
520 
521 static int
522 cheader(Biobuf *bin, ZipHead *zh)
523 {
524 	ulong v;
525 	int flen, xlen, fclen;
526 
527 	v = get4(bin);
528 	if(v != ZCHeader){
529 		if(v == ZECHeader)
530 			return 0;
531 		error("bad magic number %lux", v);
532 	}
533 	zh->madevers = get1(bin);
534 	zh->madeos = get1(bin);
535 	zh->extvers = get1(bin);
536 	zh->extos = get1(bin);
537 	zh->flags = get2(bin);
538 	zh->meth = get2(bin);
539 	zh->modtime = get2(bin);
540 	zh->moddate = get2(bin);
541 	zh->crc = get4(bin);
542 	zh->csize = get4(bin);
543 	zh->uncsize = get4(bin);
544 	flen = get2(bin);
545 	xlen = get2(bin);
546 	fclen = get2(bin);
547 	get2(bin);		/* disk number start */
548 	zh->iattr = get2(bin);
549 	zh->eattr = get4(bin);
550 	zh->off = get4(bin);
551 
552 	zh->file = getname(bin, flen);
553 
554 	while(xlen-- > 0)
555 		get1(bin);
556 
557 	while(fclen-- > 0)
558 		get1(bin);
559 
560 	return 1;
561 }
562 
563 static int
564 header(Biobuf *bin, ZipHead *zh)
565 {
566 	ulong v;
567 	int flen, xlen;
568 
569 	v = get4(bin);
570 	if(v != ZHeader){
571 		if(v == ZCHeader)
572 			return 0;
573 		error("bad magic number %lux at %lld", v, Boffset(bin)-4);
574 	}
575 	zh->extvers = get1(bin);
576 	zh->extos = get1(bin);
577 	zh->flags = get2(bin);
578 	zh->meth = get2(bin);
579 	zh->modtime = get2(bin);
580 	zh->moddate = get2(bin);
581 	zh->crc = get4(bin);
582 	zh->csize = get4(bin);
583 	zh->uncsize = get4(bin);
584 	flen = get2(bin);
585 	xlen = get2(bin);
586 
587 	zh->file = getname(bin, flen);
588 
589 	while(xlen-- > 0)
590 		get1(bin);
591 
592 	return 1;
593 }
594 
595 static void
596 trailer(Biobuf *bin, ZipHead *zh)
597 {
598 	if(zh->flags & ZTrailInfo){
599 		zh->crc = get4(bin);
600 		zh->csize = get4(bin);
601 		zh->uncsize = get4(bin);
602 	}
603 }
604 
605 static char*
606 getname(Biobuf *bin, int len)
607 {
608 	char *s;
609 	int i, c;
610 
611 	s = emalloc(len + 1);
612 	for(i = 0; i < len; i++){
613 		c = get1(bin);
614 		if(lower)
615 			c = tolower(c);
616 		s[i] = c;
617 	}
618 	s[i] = '\0';
619 	return s;
620 }
621 
622 static int
623 crcwrite(void *out, void *buf, int n)
624 {
625 	int fd, nw;
626 
627 	wlen += n;
628 	crc = blockcrc(crctab, crc, buf, n);
629 	fd = (int)(uintptr)out;
630 	if(fd < 0)
631 		return n;
632 	nw = write(fd, buf, n);
633 	if(nw != n)
634 		wbad = 1;
635 	return nw;
636 }
637 
638 static int
639 copyout(int ofd, Biobuf *bin, long len)
640 {
641 	char buf[BufSize];
642 	int n;
643 
644 	for(; len > 0; len -= n){
645 		n = len;
646 		if(n > BufSize)
647 			n = BufSize;
648 		n = Bread(bin, buf, n);
649 		if(n <= 0)
650 			return 0;
651 		rlen += n;
652 		if(crcwrite((void*)ofd, buf, n) != n)
653 			return 0;
654 	}
655 	return 1;
656 }
657 
658 static ulong
659 get4(Biobuf *b)
660 {
661 	ulong v;
662 	int i, c;
663 
664 	v = 0;
665 	for(i = 0; i < 4; i++){
666 		c = Bgetc(b);
667 		if(c < 0)
668 			error("unexpected eof reading file information");
669 		v |= c << (i * 8);
670 	}
671 	return v;
672 }
673 
674 static int
675 get2(Biobuf *b)
676 {
677 	int i, c, v;
678 
679 	v = 0;
680 	for(i = 0; i < 2; i++){
681 		c = Bgetc(b);
682 		if(c < 0)
683 			error("unexpected eof reading file information");
684 		v |= c << (i * 8);
685 	}
686 	return v;
687 }
688 
689 static int
690 get1(Biobuf *b)
691 {
692 	int c;
693 
694 	c = Bgetc(b);
695 	if(c < 0)
696 		error("unexpected eof reading file information");
697 	return c;
698 }
699 
700 static long
701 msdos2time(int time, int date)
702 {
703 	Tm tm;
704 
705 	tm.hour = time >> 11;
706 	tm.min = (time >> 5) & 63;
707 	tm.sec = (time & 31) << 1;
708 	tm.year = 80 + (date >> 9);
709 	tm.mon = ((date >> 5) & 15) - 1;
710 	tm.mday = date & 31;
711 	tm.zone[0] = '\0';
712 	tm.yday = 0;
713 
714 	return tm2sec(&tm);
715 }
716 
717 static void*
718 emalloc(ulong n)
719 {
720 	void *p;
721 
722 	p = malloc(n);
723 	if(p == nil)
724 		sysfatal("out of memory");
725 	return p;
726 }
727 
728 static void
729 error(char *fmt, ...)
730 {
731 	va_list arg;
732 
733 	fprint(2, "unzip: ");
734 	va_start(arg, fmt);
735 	vfprint(2, fmt, arg);
736 	va_end(arg);
737 	fprint(2, "\n");
738 
739 	if(delfile != nil){
740 		fprint(2, "unzip: removing output file %s\n", delfile);
741 		remove(delfile);
742 		delfile = nil;
743 	}
744 
745 	longjmp(zjmp, 1);
746 }
747