xref: /plan9/sys/src/cmd/gzip/unzip.c (revision ff8c3af2f44d95267f67219afa20ba82ff6cf7e4)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <flate.h>
5 #include "zip.h"
6 
7 enum
8 {
9 	BufSize	= 4096
10 };
11 
12 static	int	cheader(Biobuf *bin, ZipHead *zh);
13 static	int	copyout(int ofd, Biobuf *bin, long len);
14 static	int	crcwrite(void *ofd, void *buf, int n);
15 static	int	findCDir(Biobuf *bin, char *file);
16 static	int	get1(Biobuf *b);
17 static	int	get2(Biobuf *b);
18 static	ulong	get4(Biobuf *b);
19 static	char	*getname(Biobuf *b, int len);
20 static	int	header(Biobuf *bin, ZipHead *zh);
21 static	long	msdos2time(int time, int date);
22 static	int	sunzip(Biobuf *bin);
23 static	int	sunztable(Biobuf *bin);
24 static	void	trailer(Biobuf *bin, ZipHead *zh);
25 static	int	unzip(Biobuf *bin, char *file);
26 static	int	unzipEntry(Biobuf *bin, ZipHead *czh);
27 static	int	unztable(Biobuf *bin, char *file);
28 static	int	wantFile(char *file);
29 
30 static	void	*emalloc(ulong);
31 static	void	error(char*, ...);
32 #pragma	varargck	argpos	error	1
33 
34 static	Biobuf	bin;
35 static	ulong	crc;
36 static	ulong	*crctab;
37 static	int	debug;
38 static	char	*delfile;
39 static	int	lower;
40 static	int	nwant;
41 static	ulong	rlen;
42 static	int	settimes;
43 static	int	stdout;
44 static	int	verbose;
45 static	char	**want;
46 static	int	wbad;
47 static	ulong	wlen;
48 static	jmp_buf	zjmp;
49 
50 static void
51 usage(void)
52 {
53 	fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
54 	exits("usage");
55 }
56 
57 void
58 main(int argc, char *argv[])
59 {
60 	char *zfile;
61 	int fd, ok, table, stream;
62 
63 	table = 0;
64 	stream = 0;
65 	zfile = nil;
66 	ARGBEGIN{
67 	case 'D':
68 		debug++;
69 		break;
70 	case 'c':
71 		stdout++;
72 		break;
73 	case 'i':
74 		lower++;
75 		break;
76 	case 'f':
77 		zfile = ARGF();
78 		if(zfile == nil)
79 			usage();
80 		break;
81 	case 's':
82 		stream++;
83 		break;
84 	case 't':
85 		table++;
86 		break;
87 	case 'T':
88 		settimes++;
89 		break;
90 	case 'v':
91 		verbose++;
92 		break;
93 	default:
94 		usage();
95 		break;
96 	}ARGEND
97 
98 	nwant = argc;
99 	want = argv;
100 
101 	crctab = mkcrctab(ZCrcPoly);
102 	ok = inflateinit();
103 	if(ok != FlateOk)
104 		sysfatal("inflateinit failed: %s\n", flateerr(ok));
105 
106 	if(zfile == nil){
107 		Binit(&bin, 0, OREAD);
108 		zfile = "<stdin>";
109 	}else{
110 		fd = open(zfile, OREAD);
111 		if(fd < 0)
112 			sysfatal("can't open %s: %r", zfile);
113 		Binit(&bin, fd, OREAD);
114 	}
115 
116 	if(table){
117 		if(stream)
118 			ok = sunztable(&bin);
119 		else
120 			ok = unztable(&bin, zfile);
121 	}else{
122 		if(stream)
123 			ok = sunzip(&bin);
124 		else
125 			ok = unzip(&bin, zfile);
126 	}
127 
128 	exits(ok ? nil: "errors");
129 }
130 
131 /*
132  * print the table of contents from the "central directory structure"
133  */
134 static int
135 unztable(Biobuf *bin, char *file)
136 {
137 	ZipHead zh;
138 	int entries;
139 
140 	entries = findCDir(bin, file);
141 	if(entries < 0)
142 		return 0;
143 
144 	if(verbose > 1)
145 		print("%d items in the archive\n", entries);
146 	while(entries-- > 0){
147 		if(setjmp(zjmp)){
148 			free(zh.file);
149 			return 0;
150 		}
151 
152 		memset(&zh, 0, sizeof(zh));
153 		if(!cheader(bin, &zh))
154 			return 1;
155 
156 		if(wantFile(zh.file)){
157 			if(verbose)
158 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
159 			else
160 				print("%s\n", zh.file);
161 
162 			if(verbose > 1){
163 				print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
164 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
165 				print("\tflags %x\n", zh.flags);
166 				print("\tmethod %d\n", zh.meth);
167 				print("\tmod time %d\n", zh.modtime);
168 				print("\tmod date %d\n", zh.moddate);
169 				print("\tcrc %lux\n", zh.crc);
170 				print("\tcompressed size %lud\n", zh.csize);
171 				print("\tuncompressed size %lud\n", zh.uncsize);
172 				print("\tinternal attributes %ux\n", zh.iattr);
173 				print("\texternal attributes %lux\n", zh.eattr);
174 				print("\tstarts at %ld\n", zh.off);
175 			}
176 		}
177 
178 		free(zh.file);
179 		zh.file = nil;
180 	}
181 
182 	return 1;
183 }
184 
185 /*
186  * print the "local file header" table of contents
187  */
188 static int
189 sunztable(Biobuf *bin)
190 {
191 	ZipHead zh;
192 	vlong off;
193 	ulong hcrc, hcsize, huncsize;
194 	int ok, err;
195 
196 	ok = 1;
197 	for(;;){
198 		if(setjmp(zjmp)){
199 			free(zh.file);
200 			return 0;
201 		}
202 
203 		memset(&zh, 0, sizeof(zh));
204 		if(!header(bin, &zh))
205 			return ok;
206 
207 		hcrc = zh.crc;
208 		hcsize = zh.csize;
209 		huncsize = zh.uncsize;
210 
211 		wlen = 0;
212 		rlen = 0;
213 		crc = 0;
214 		wbad = 0;
215 
216 		if(zh.meth == 0){
217 			if(!copyout(-1, bin, zh.csize))
218 				error("reading data for %s failed: %r", zh.file);
219 		}else if(zh.meth == 8){
220 			off = Boffset(bin);
221 			err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
222 			if(err != FlateOk)
223 				error("inflate %s failed: %s", zh.file, flateerr(err));
224 			rlen = Boffset(bin) - off;
225 		}else
226 			error("can't handle compression method %d for %s", zh.meth, zh.file);
227 
228 		trailer(bin, &zh);
229 
230 		if(wantFile(zh.file)){
231 			if(verbose)
232 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
233 			else
234 				print("%s\n", zh.file);
235 
236 			if(verbose > 1){
237 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
238 				print("\tflags %x\n", zh.flags);
239 				print("\tmethod %d\n", zh.meth);
240 				print("\tmod time %d\n", zh.modtime);
241 				print("\tmod date %d\n", zh.moddate);
242 				print("\tcrc %lux\n", zh.crc);
243 				print("\tcompressed size %lud\n", zh.csize);
244 				print("\tuncompressed size %lud\n", zh.uncsize);
245 				if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
246 					print("\theader crc %lux\n", zh.crc);
247 					print("\theader compressed size %lud\n", zh.csize);
248 					print("\theader uncompressed size %lud\n", zh.uncsize);
249 				}
250 			}
251 		}
252 
253 		if(zh.crc != crc)
254 			error("crc mismatch for %s", zh.file);
255 		if(zh.uncsize != wlen)
256 			error("output size mismatch for %s", zh.file);
257 		if(zh.csize != rlen)
258 			error("input size mismatch for %s", zh.file);
259 
260 
261 		free(zh.file);
262 		zh.file = nil;
263 	}
264 
265 	return ok;
266 }
267 
268 /*
269  * extract files using the info in the central directory structure
270  */
271 static int
272 unzip(Biobuf *bin, char *file)
273 {
274 	ZipHead zh;
275 	vlong off;
276 	int ok, eok, entries;
277 
278 	entries = findCDir(bin, file);
279 	if(entries < 0)
280 		return 0;
281 
282 	ok = 1;
283 	while(entries-- > 0){
284 		if(setjmp(zjmp)){
285 			free(zh.file);
286 			return 0;
287 		}
288 		memset(&zh, 0, sizeof(zh));
289 		if(!cheader(bin, &zh))
290 			return ok;
291 
292 
293 		off = Boffset(bin);
294 		if(wantFile(zh.file)){
295 			if(Bseek(bin, zh.off, 0) < 0){
296 				fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
297 				ok = 0;
298 			}else{
299 				eok = unzipEntry(bin, &zh);
300 				if(eok <= 0){
301 					fprint(2, "unzip: skipping %s\n", zh.file);
302 					ok = 0;
303 				}
304 			}
305 		}
306 
307 		free(zh.file);
308 		zh.file = nil;
309 
310 		if(Bseek(bin, off, 0) < 0){
311 			fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
312 			return 0;
313 		}
314 	}
315 
316 	return ok;
317 }
318 
319 /*
320  * extract files using the info the "local file headers"
321  */
322 static int
323 sunzip(Biobuf *bin)
324 {
325 	int eok;
326 
327 	for(;;){
328 		eok = unzipEntry(bin, nil);
329 		if(eok == 0)
330 			return 1;
331 		if(eok < 0)
332 			return 0;
333 	}
334 
335 	return 1;
336 }
337 
338 /*
339  * extracts a single entry from a zip file
340  * czh is the optional corresponding central directory entry
341  */
342 static int
343 unzipEntry(Biobuf *bin, ZipHead *czh)
344 {
345 	Dir *d;
346 	ZipHead zh;
347 	char *p;
348 	vlong off;
349 	int fd, isdir, ok, err;
350 
351 	zh.file = nil;
352 	if(setjmp(zjmp)){
353 		delfile = nil;
354 		free(zh.file);
355 		return -1;
356 	}
357 
358 	memset(&zh, 0, sizeof(zh));
359 	if(!header(bin, &zh))
360 		return 0;
361 
362 	ok = 1;
363 	isdir = 0;
364 
365 	fd = -1;
366 	if(wantFile(zh.file)){
367 		if(verbose)
368 			fprint(2, "extracting %s\n", zh.file);
369 
370 		if(czh != nil && czh->extos == ZDos){
371 			isdir = czh->eattr & ZDDir;
372 			if(isdir && zh.uncsize != 0)
373 				fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
374 		}
375 		if(zh.meth == 0 && zh.uncsize == 0){
376 			p = strchr(zh.file, '\0');
377 			if(p > zh.file && p[-1] == '/')
378 				isdir = 1;
379 		}
380 
381 		if(stdout){
382 			if(ok && !isdir)
383 				fd = 1;
384 		}else if(isdir){
385 			fd = create(zh.file, OREAD, DMDIR | 0775);
386 			if(fd < 0){
387 				d = dirstat(zh.file);
388 				if(d == nil || (d->mode & DMDIR) != DMDIR){
389 					fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
390 					ok = 0;
391 				}
392 				free(d);
393 			}
394 		}else if(ok){
395 			fd = create(zh.file, OWRITE, 0664);
396 			if(fd < 0){
397 				fprint(2, "unzip: can't create %s: %r\n", zh.file);
398 				ok = 0;
399 			}else
400 				delfile = zh.file;
401 		}
402 	}
403 
404 	wlen = 0;
405 	rlen = 0;
406 	crc = 0;
407 	wbad = 0;
408 
409 	if(zh.meth == 0){
410 		if(!copyout(fd, bin, zh.csize))
411 			error("copying data for %s failed: %r", zh.file);
412 	}else if(zh.meth == 8){
413 		off = Boffset(bin);
414 		err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
415 		if(err != FlateOk)
416 			error("inflate failed: %s", flateerr(err));
417 		rlen = Boffset(bin) - off;
418 	}else
419 		error("can't handle compression method %d for %s", zh.meth, zh.file);
420 
421 	trailer(bin, &zh);
422 
423 	if(zh.crc != crc)
424 		error("crc mismatch for %s", zh.file);
425 	if(zh.uncsize != wlen)
426 		error("output size mismatch for %s", zh.file);
427 	if(zh.csize != rlen)
428 		error("input size mismatch for %s", zh.file);
429 
430 	delfile = nil;
431 	free(zh.file);
432 
433 	if(fd >= 0 && !stdout){
434 		if(settimes){
435 			d = dirfstat(fd);
436 			if(d != nil){
437 				d->mtime = msdos2time(zh.modtime, zh.moddate);
438 				if(d->mtime)
439 					dirfwstat(fd, d);
440 			}
441 		}
442 		close(fd);
443 	}
444 
445 	return ok;
446 }
447 
448 static int
449 wantFile(char *file)
450 {
451 	int i, n;
452 
453 	if(nwant == 0)
454 		return 1;
455 	for(i = 0; i < nwant; i++){
456 		if(strcmp(want[i], file) == 0)
457 			return 1;
458 		n = strlen(want[i]);
459 		if(strncmp(want[i], file, n) == 0 && file[n] == '/')
460 			return 1;
461 	}
462 	return 0;
463 }
464 
465 /*
466  * find the start of the central directory
467  * returns the number of entries in the directory,
468  * or -1 if there was an error
469  */
470 static int
471 findCDir(Biobuf *bin, char *file)
472 {
473 	vlong ecoff;
474 	long off, size;
475 	int entries, zclen, dn, ds, de;
476 
477 	ecoff = Bseek(bin, -ZECHeadSize, 2);
478 	if(ecoff < 0){
479 		fprint(2, "unzip: can't seek to contents of %s; try adding -s\n", file);
480 		return -1;
481 	}
482 	if(setjmp(zjmp))
483 		return -1;
484 
485 	if(get4(bin) != ZECHeader){
486 		fprint(2, "unzip: bad magic number for contents of %s\n", file);
487 		return -1;
488 	}
489 	dn = get2(bin);
490 	ds = get2(bin);
491 	de = get2(bin);
492 	entries = get2(bin);
493 	size = get4(bin);
494 	off = get4(bin);
495 	zclen = get2(bin);
496 	while(zclen-- > 0)
497 		get1(bin);
498 
499 	if(verbose > 1){
500 		print("table starts at %ld for %ld bytes\n", off, size);
501 		if(ecoff - size != off)
502 			print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
503 		if(dn || ds || de != entries)
504 			print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
505 	}
506 
507 	if(Bseek(bin, off, 0) != off){
508 		fprint(2, "unzip: can't seek to start of contents of %s\n", file);
509 		return -1;
510 	}
511 
512 	return entries;
513 }
514 
515 static int
516 cheader(Biobuf *bin, ZipHead *zh)
517 {
518 	ulong v;
519 	int flen, xlen, fclen;
520 
521 	v = get4(bin);
522 	if(v != ZCHeader){
523 		if(v == ZECHeader)
524 			return 0;
525 		error("bad magic number %lux", v);
526 	}
527 	zh->madevers = get1(bin);
528 	zh->madeos = get1(bin);
529 	zh->extvers = get1(bin);
530 	zh->extos = get1(bin);
531 	zh->flags = get2(bin);
532 	zh->meth = get2(bin);
533 	zh->modtime = get2(bin);
534 	zh->moddate = get2(bin);
535 	zh->crc = get4(bin);
536 	zh->csize = get4(bin);
537 	zh->uncsize = get4(bin);
538 	flen = get2(bin);
539 	xlen = get2(bin);
540 	fclen = get2(bin);
541 	get2(bin);		/* disk number start */
542 	zh->iattr = get2(bin);
543 	zh->eattr = get4(bin);
544 	zh->off = get4(bin);
545 
546 	zh->file = getname(bin, flen);
547 
548 	while(xlen-- > 0)
549 		get1(bin);
550 
551 	while(fclen-- > 0)
552 		get1(bin);
553 
554 	return 1;
555 }
556 
557 static int
558 header(Biobuf *bin, ZipHead *zh)
559 {
560 	ulong v;
561 	int flen, xlen;
562 
563 	v = get4(bin);
564 	if(v != ZHeader){
565 		if(v == ZCHeader)
566 			return 0;
567 		error("bad magic number %lux at %lld", v, Boffset(bin)-4);
568 	}
569 	zh->extvers = get1(bin);
570 	zh->extos = get1(bin);
571 	zh->flags = get2(bin);
572 	zh->meth = get2(bin);
573 	zh->modtime = get2(bin);
574 	zh->moddate = get2(bin);
575 	zh->crc = get4(bin);
576 	zh->csize = get4(bin);
577 	zh->uncsize = get4(bin);
578 	flen = get2(bin);
579 	xlen = get2(bin);
580 
581 	zh->file = getname(bin, flen);
582 
583 	while(xlen-- > 0)
584 		get1(bin);
585 
586 	return 1;
587 }
588 
589 static void
590 trailer(Biobuf *bin, ZipHead *zh)
591 {
592 	if(zh->flags & ZTrailInfo){
593 		zh->crc = get4(bin);
594 		zh->csize = get4(bin);
595 		zh->uncsize = get4(bin);
596 	}
597 }
598 
599 static char*
600 getname(Biobuf *bin, int len)
601 {
602 	char *s;
603 	int i, c;
604 
605 	s = emalloc(len + 1);
606 	for(i = 0; i < len; i++){
607 		c = get1(bin);
608 		if(lower)
609 			c = tolower(c);
610 		s[i] = c;
611 	}
612 	s[i] = '\0';
613 	return s;
614 }
615 
616 static int
617 crcwrite(void *out, void *buf, int n)
618 {
619 	int fd, nw;
620 
621 	wlen += n;
622 	crc = blockcrc(crctab, crc, buf, n);
623 	fd = (int)out;
624 	if(fd < 0)
625 		return n;
626 	nw = write(fd, buf, n);
627 	if(nw != n)
628 		wbad = 1;
629 	return nw;
630 }
631 
632 static int
633 copyout(int ofd, Biobuf *bin, long len)
634 {
635 	char buf[BufSize];
636 	int n;
637 
638 	for(; len > 0; len -= n){
639 		n = len;
640 		if(n > BufSize)
641 			n = BufSize;
642 		n = Bread(bin, buf, n);
643 		if(n <= 0)
644 			return 0;
645 		rlen += n;
646 		if(crcwrite((void*)ofd, buf, n) != n)
647 			return 0;
648 	}
649 	return 1;
650 }
651 
652 static ulong
653 get4(Biobuf *b)
654 {
655 	ulong v;
656 	int i, c;
657 
658 	v = 0;
659 	for(i = 0; i < 4; i++){
660 		c = Bgetc(b);
661 		if(c < 0)
662 			error("unexpected eof reading file information");
663 		v |= c << (i * 8);
664 	}
665 	return v;
666 }
667 
668 static int
669 get2(Biobuf *b)
670 {
671 	int i, c, v;
672 
673 	v = 0;
674 	for(i = 0; i < 2; i++){
675 		c = Bgetc(b);
676 		if(c < 0)
677 			error("unexpected eof reading file information");
678 		v |= c << (i * 8);
679 	}
680 	return v;
681 }
682 
683 static int
684 get1(Biobuf *b)
685 {
686 	int c;
687 
688 	c = Bgetc(b);
689 	if(c < 0)
690 		error("unexpected eof reading file information");
691 	return c;
692 }
693 
694 static long
695 msdos2time(int time, int date)
696 {
697 	Tm tm;
698 
699 	tm.hour = time >> 11;
700 	tm.min = (time >> 5) & 63;
701 	tm.sec = (time & 31) << 1;
702 	tm.year = 80 + (date >> 9);
703 	tm.mon = ((date >> 5) & 15) - 1;
704 	tm.mday = date & 31;
705 	tm.zone[0] = '\0';
706 	tm.yday = 0;
707 
708 	return tm2sec(&tm);
709 }
710 
711 static void*
712 emalloc(ulong n)
713 {
714 	void *p;
715 
716 	p = malloc(n);
717 	if(p == nil)
718 		sysfatal("out of memory");
719 	return p;
720 }
721 
722 static void
723 error(char *fmt, ...)
724 {
725 	va_list arg;
726 
727 	fprint(2, "unzip: ");
728 	va_start(arg, fmt);
729 	vfprint(2, fmt, arg);
730 	va_end(arg);
731 	fprint(2, "\n");
732 
733 	if(delfile != nil){
734 		fprint(2, "unzip: removing output file %s\n", delfile);
735 		remove(delfile);
736 		delfile = nil;
737 	}
738 
739 	longjmp(zjmp, 1);
740 }
741