xref: /plan9-contrib/sys/src/cmd/gzip/unzip.c (revision a6a9e07217f318acf170f99684a55fba5200524f)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <flate.h>
5 #include "zip.h"
6 
7 enum
8 {
9 	BufSize	= 4096
10 };
11 
12 static	int	cheader(Biobuf *bin, ZipHead *zh);
13 static	int	copyout(int ofd, Biobuf *bin, long len);
14 static	int	crcwrite(void *ofd, void *buf, int n);
15 static	int	findCDir(Biobuf *bin, char *file);
16 static	int	get1(Biobuf *b);
17 static	int	get2(Biobuf *b);
18 static	ulong	get4(Biobuf *b);
19 static	char	*getname(Biobuf *b, int len);
20 static	int	header(Biobuf *bin, ZipHead *zh);
21 static	long	msdos2time(int time, int date);
22 static	int	sunzip(Biobuf *bin);
23 static	int	sunztable(Biobuf *bin);
24 static	void	trailer(Biobuf *bin, ZipHead *zh);
25 static	int	unzip(Biobuf *bin, char *file);
26 static	int	unzipEntry(Biobuf *bin, ZipHead *czh);
27 static	int	unztable(Biobuf *bin, char *file);
28 static	int	wantFile(char *file);
29 
30 static	void	*emalloc(ulong);
31 static	void	error(char*, ...);
32 #pragma	varargck	argpos	error	1
33 
34 static	Biobuf	bin;
35 static	ulong	crc;
36 static	ulong	*crctab;
37 static	int	debug;
38 static	char	*delfile;
39 static	int	lower;
40 static	int	nwant;
41 static	ulong	rlen;
42 static	int	settimes;
43 static	int	stdout;
44 static	int	verbose;
45 static	char	**want;
46 static	int	wbad;
47 static	ulong	wlen;
48 static	jmp_buf	zjmp;
49 
50 static void
51 usage(void)
52 {
53 	fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
54 	exits("usage");
55 }
56 
57 void
58 main(int argc, char *argv[])
59 {
60 	char *zfile;
61 	int fd, ok, table, stream;
62 
63 	table = 0;
64 	stream = 0;
65 	zfile = nil;
66 	ARGBEGIN{
67 	case 'D':
68 		debug++;
69 		break;
70 	case 'c':
71 		stdout++;
72 		break;
73 	case 'i':
74 		lower++;
75 		break;
76 	case 'f':
77 		zfile = ARGF();
78 		if(zfile == nil)
79 			usage();
80 		break;
81 	case 's':
82 		stream++;
83 		break;
84 	case 't':
85 		table++;
86 		break;
87 	case 'T':
88 		settimes++;
89 		break;
90 	case 'v':
91 		verbose++;
92 		break;
93 	default:
94 		usage();
95 		break;
96 	}ARGEND
97 
98 	nwant = argc;
99 	want = argv;
100 
101 	crctab = mkcrctab(ZCrcPoly);
102 	ok = inflateinit();
103 	if(ok != FlateOk)
104 		sysfatal("inflateinit failed: %s\n", flateerr(ok));
105 
106 	if(zfile == nil){
107 		Binit(&bin, 0, OREAD);
108 		zfile = "<stdin>";
109 	}else{
110 		fd = open(zfile, OREAD);
111 		if(fd < 0)
112 			sysfatal("can't open %s: %r", zfile);
113 		Binit(&bin, fd, OREAD);
114 	}
115 
116 	if(table){
117 		if(stream)
118 			ok = sunztable(&bin);
119 		else
120 			ok = unztable(&bin, zfile);
121 	}else{
122 		if(stream)
123 			ok = sunzip(&bin);
124 		else
125 			ok = unzip(&bin, zfile);
126 	}
127 
128 	exits(ok ? nil: "errors");
129 }
130 
131 /*
132  * print the table of contents from the "central directory structure"
133  */
134 static int
135 unztable(Biobuf *bin, char *file)
136 {
137 	ZipHead zh;
138 	int entries;
139 
140 	entries = findCDir(bin, file);
141 	if(entries < 0)
142 		return 0;
143 
144 	if(verbose > 1)
145 		print("%d items in the archive\n", entries);
146 	while(entries-- > 0){
147 		if(setjmp(zjmp)){
148 			free(zh.file);
149 			return 0;
150 		}
151 
152 		memset(&zh, 0, sizeof(zh));
153 		if(!cheader(bin, &zh))
154 			return 1;
155 
156 		if(wantFile(zh.file)){
157 			if(verbose)
158 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
159 			else
160 				print("%s\n", zh.file);
161 
162 			if(verbose > 1){
163 				print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
164 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
165 				print("\tflags %x\n", zh.flags);
166 				print("\tmethod %d\n", zh.meth);
167 				print("\tmod time %d\n", zh.modtime);
168 				print("\tmod date %d\n", zh.moddate);
169 				print("\tcrc %lux\n", zh.crc);
170 				print("\tcompressed size %lud\n", zh.csize);
171 				print("\tuncompressed size %lud\n", zh.uncsize);
172 				print("\tinternal attributes %ux\n", zh.iattr);
173 				print("\texternal attributes %lux\n", zh.eattr);
174 				print("\tstarts at %ld\n", zh.off);
175 			}
176 		}
177 
178 		free(zh.file);
179 		zh.file = nil;
180 	}
181 
182 	return 1;
183 }
184 
185 /*
186  * print the "local file header" table of contents
187  */
188 static int
189 sunztable(Biobuf *bin)
190 {
191 	ZipHead zh;
192 	vlong off;
193 	ulong hcrc, hcsize, huncsize;
194 	int ok, err;
195 
196 	ok = 1;
197 	for(;;){
198 		if(setjmp(zjmp)){
199 			free(zh.file);
200 			return 0;
201 		}
202 
203 		memset(&zh, 0, sizeof(zh));
204 		if(!header(bin, &zh))
205 			return ok;
206 
207 		hcrc = zh.crc;
208 		hcsize = zh.csize;
209 		huncsize = zh.uncsize;
210 
211 		wlen = 0;
212 		rlen = 0;
213 		crc = 0;
214 		wbad = 0;
215 
216 		if(zh.meth == 0){
217 			if(!copyout(-1, bin, zh.csize))
218 				error("reading data for %s failed: %r", zh.file);
219 		}else if(zh.meth == 8){
220 			off = Boffset(bin);
221 			err = inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc);
222 			if(err != FlateOk)
223 				error("inflate %s failed: %s", zh.file, flateerr(err));
224 			rlen = Boffset(bin) - off;
225 		}else
226 			error("can't handle compression method %d for %s", zh.meth, zh.file);
227 
228 		trailer(bin, &zh);
229 
230 		if(wantFile(zh.file)){
231 			if(verbose)
232 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
233 			else
234 				print("%s\n", zh.file);
235 
236 			if(verbose > 1){
237 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
238 				print("\tflags %x\n", zh.flags);
239 				print("\tmethod %d\n", zh.meth);
240 				print("\tmod time %d\n", zh.modtime);
241 				print("\tmod date %d\n", zh.moddate);
242 				print("\tcrc %lux\n", zh.crc);
243 				print("\tcompressed size %lud\n", zh.csize);
244 				print("\tuncompressed size %lud\n", zh.uncsize);
245 				if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
246 					print("\theader crc %lux\n", zh.crc);
247 					print("\theader compressed size %lud\n", zh.csize);
248 					print("\theader uncompressed size %lud\n", zh.uncsize);
249 				}
250 			}
251 		}
252 
253 		if(zh.crc != crc)
254 			error("crc mismatch for %s", zh.file);
255 		if(zh.uncsize != wlen)
256 			error("output size mismatch for %s", zh.file);
257 		if(zh.csize != rlen)
258 			error("input size mismatch for %s", zh.file);
259 
260 
261 		free(zh.file);
262 		zh.file = nil;
263 	}
264 
265 	return ok;
266 }
267 
268 /*
269  * extract files using the info in the central directory structure
270  */
271 static int
272 unzip(Biobuf *bin, char *file)
273 {
274 	ZipHead zh;
275 	vlong off;
276 	int ok, eok, entries;
277 
278 	entries = findCDir(bin, file);
279 	if(entries < 0)
280 		return 0;
281 
282 	ok = 1;
283 	while(entries-- > 0){
284 		if(setjmp(zjmp)){
285 			free(zh.file);
286 			return 0;
287 		}
288 		memset(&zh, 0, sizeof(zh));
289 		if(!cheader(bin, &zh))
290 			return ok;
291 
292 
293 		off = Boffset(bin);
294 		if(wantFile(zh.file)){
295 			if(Bseek(bin, zh.off, 0) < 0){
296 				fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
297 				ok = 0;
298 			}else{
299 				eok = unzipEntry(bin, &zh);
300 				if(eok <= 0){
301 					fprint(2, "unzip: skipping %s\n", zh.file);
302 					ok = 0;
303 				}
304 			}
305 		}
306 
307 		free(zh.file);
308 		zh.file = nil;
309 
310 		if(Bseek(bin, off, 0) < 0){
311 			fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
312 			return 0;
313 		}
314 	}
315 
316 	return ok;
317 }
318 
319 /*
320  * extract files using the info the "local file headers"
321  */
322 static int
323 sunzip(Biobuf *bin)
324 {
325 	int eok;
326 
327 	for(;;){
328 		eok = unzipEntry(bin, nil);
329 		if(eok == 0)
330 			return 1;
331 		if(eok < 0)
332 			return 0;
333 	}
334 
335 	return 1;
336 }
337 
338 /*
339  * extracts a single entry from a zip file
340  * czh is the optional corresponding central directory entry
341  */
342 static int
343 unzipEntry(Biobuf *bin, ZipHead *czh)
344 {
345 	Dir *d;
346 	ZipHead zh;
347 	char *p;
348 	vlong off;
349 	int fd, isdir, ok, err;
350 
351 	zh.file = nil;
352 	if(setjmp(zjmp)){
353 		delfile = nil;
354 		free(zh.file);
355 		return -1;
356 	}
357 
358 	memset(&zh, 0, sizeof(zh));
359 	if(!header(bin, &zh))
360 		return 0;
361 
362 	ok = 1;
363 	isdir = 0;
364 
365 	fd = -1;
366 	if(wantFile(zh.file)){
367 		if(verbose)
368 			fprint(2, "extracting %s\n", zh.file);
369 
370 		if(czh != nil && czh->extos == ZDos){
371 			isdir = czh->eattr & ZDDir;
372 			if(isdir && zh.uncsize != 0)
373 				fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
374 		}else if(zh.meth == 0 && zh.uncsize == 0){
375 			p = strchr(zh.file, '\0');
376 			if(p > zh.file && p[-1] == '/')
377 				isdir = 1;
378 		}
379 
380 		if(stdout){
381 			if(ok && !isdir)
382 				fd = 1;
383 		}else if(isdir){
384 			fd = create(zh.file, OREAD, DMDIR | 0775);
385 			if(fd < 0){
386 				d = dirstat(zh.file);
387 				if(d == nil || (d->mode & DMDIR) != DMDIR){
388 					fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
389 					ok = 0;
390 				}
391 				free(d);
392 			}
393 		}else if(ok){
394 			fd = create(zh.file, OWRITE, 0664);
395 			if(fd < 0){
396 				fprint(2, "unzip: can't create %s: %r\n", zh.file);
397 				ok = 0;
398 			}else
399 				delfile = zh.file;
400 		}
401 	}
402 
403 	wlen = 0;
404 	rlen = 0;
405 	crc = 0;
406 	wbad = 0;
407 
408 	if(zh.meth == 0){
409 		if(!copyout(fd, bin, zh.csize))
410 			error("copying data for %s failed: %r", zh.file);
411 	}else if(zh.meth == 8){
412 		off = Boffset(bin);
413 		err = inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc);
414 		if(err != FlateOk)
415 			error("inflate failed: %s", flateerr(err));
416 		rlen = Boffset(bin) - off;
417 	}else
418 		error("can't handle compression method %d for %s", zh.meth, zh.file);
419 
420 	trailer(bin, &zh);
421 
422 	if(zh.crc != crc)
423 		error("crc mismatch for %s", zh.file);
424 	if(zh.uncsize != wlen)
425 		error("output size mismatch for %s", zh.file);
426 	if(zh.csize != rlen)
427 		error("input size mismatch for %s", zh.file);
428 
429 	delfile = nil;
430 	free(zh.file);
431 
432 	if(fd >= 0 && !stdout){
433 		if(settimes){
434 			d = dirfstat(fd);
435 			if(d != nil){
436 				d->mtime = msdos2time(zh.modtime, zh.moddate);
437 				if(d->mtime)
438 					dirfwstat(fd, d);
439 			}
440 		}
441 		close(fd);
442 	}
443 
444 	return ok;
445 }
446 
447 static int
448 wantFile(char *file)
449 {
450 	int i, n;
451 
452 	if(nwant == 0)
453 		return 1;
454 	for(i = 0; i < nwant; i++){
455 		if(strcmp(want[i], file) == 0)
456 			return 1;
457 		n = strlen(want[i]);
458 		if(strncmp(want[i], file, n) == 0 && file[n] == '/')
459 			return 1;
460 	}
461 	return 0;
462 }
463 
464 /*
465  * find the start of the central directory
466  * returns the number of entries in the directory,
467  * or -1 if there was an error
468  */
469 static int
470 findCDir(Biobuf *bin, char *file)
471 {
472 	vlong ecoff;
473 	long off, size;
474 	int entries, zclen, dn, ds, de;
475 
476 	ecoff = Bseek(bin, -ZECHeadSize, 2);
477 	if(ecoff < 0){
478 		fprint(2, "unzip: can't seek to contents of %s; try adding -s\n", file);
479 		return -1;
480 	}
481 	if(setjmp(zjmp))
482 		return -1;
483 
484 	if(get4(bin) != ZECHeader){
485 		fprint(2, "unzip: bad magic number for contents of %s\n", file);
486 		return -1;
487 	}
488 	dn = get2(bin);
489 	ds = get2(bin);
490 	de = get2(bin);
491 	entries = get2(bin);
492 	size = get4(bin);
493 	off = get4(bin);
494 	zclen = get2(bin);
495 	while(zclen-- > 0)
496 		get1(bin);
497 
498 	if(verbose > 1){
499 		print("table starts at %ld for %ld bytes\n", off, size);
500 		if(ecoff - size != off)
501 			print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
502 		if(dn || ds || de != entries)
503 			print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
504 	}
505 
506 	if(Bseek(bin, off, 0) != off){
507 		fprint(2, "unzip: can't seek to start of contents of %s\n", file);
508 		return -1;
509 	}
510 
511 	return entries;
512 }
513 
514 static int
515 cheader(Biobuf *bin, ZipHead *zh)
516 {
517 	ulong v;
518 	int flen, xlen, fclen;
519 
520 	v = get4(bin);
521 	if(v != ZCHeader){
522 		if(v == ZECHeader)
523 			return 0;
524 		error("bad magic number %lux", v);
525 	}
526 	zh->madevers = get1(bin);
527 	zh->madeos = get1(bin);
528 	zh->extvers = get1(bin);
529 	zh->extos = get1(bin);
530 	zh->flags = get2(bin);
531 	zh->meth = get2(bin);
532 	zh->modtime = get2(bin);
533 	zh->moddate = get2(bin);
534 	zh->crc = get4(bin);
535 	zh->csize = get4(bin);
536 	zh->uncsize = get4(bin);
537 	flen = get2(bin);
538 	xlen = get2(bin);
539 	fclen = get2(bin);
540 	get2(bin);		/* disk number start */
541 	zh->iattr = get2(bin);
542 	zh->eattr = get4(bin);
543 	zh->off = get4(bin);
544 
545 	zh->file = getname(bin, flen);
546 
547 	while(xlen-- > 0)
548 		get1(bin);
549 
550 	while(fclen-- > 0)
551 		get1(bin);
552 
553 	return 1;
554 }
555 
556 static int
557 header(Biobuf *bin, ZipHead *zh)
558 {
559 	ulong v;
560 	int flen, xlen;
561 
562 	v = get4(bin);
563 	if(v != ZHeader){
564 		if(v == ZCHeader)
565 			return 0;
566 		error("bad magic number %lux at %lld", v, Boffset(bin)-4);
567 	}
568 	zh->extvers = get1(bin);
569 	zh->extos = get1(bin);
570 	zh->flags = get2(bin);
571 	zh->meth = get2(bin);
572 	zh->modtime = get2(bin);
573 	zh->moddate = get2(bin);
574 	zh->crc = get4(bin);
575 	zh->csize = get4(bin);
576 	zh->uncsize = get4(bin);
577 	flen = get2(bin);
578 	xlen = get2(bin);
579 
580 	zh->file = getname(bin, flen);
581 
582 	while(xlen-- > 0)
583 		get1(bin);
584 
585 	return 1;
586 }
587 
588 static void
589 trailer(Biobuf *bin, ZipHead *zh)
590 {
591 	if(zh->flags & ZTrailInfo){
592 		zh->crc = get4(bin);
593 		zh->csize = get4(bin);
594 		zh->uncsize = get4(bin);
595 	}
596 }
597 
598 static char*
599 getname(Biobuf *bin, int len)
600 {
601 	char *s;
602 	int i, c;
603 
604 	s = emalloc(len + 1);
605 	for(i = 0; i < len; i++){
606 		c = get1(bin);
607 		if(lower)
608 			c = tolower(c);
609 		s[i] = c;
610 	}
611 	s[i] = '\0';
612 	return s;
613 }
614 
615 static int
616 crcwrite(void *out, void *buf, int n)
617 {
618 	int fd, nw;
619 
620 	wlen += n;
621 	crc = blockcrc(crctab, crc, buf, n);
622 	fd = (int)out;
623 	if(fd < 0)
624 		return n;
625 	nw = write(fd, buf, n);
626 	if(nw != n)
627 		wbad = 1;
628 	return nw;
629 }
630 
631 static int
632 copyout(int ofd, Biobuf *bin, long len)
633 {
634 	char buf[BufSize];
635 	int n;
636 
637 	for(; len > 0; len -= n){
638 		n = len;
639 		if(n > BufSize)
640 			n = BufSize;
641 		n = Bread(bin, buf, n);
642 		if(n <= 0)
643 			return 0;
644 		rlen += n;
645 		if(crcwrite((void*)ofd, buf, n) != n)
646 			return 0;
647 	}
648 	return 1;
649 }
650 
651 static ulong
652 get4(Biobuf *b)
653 {
654 	ulong v;
655 	int i, c;
656 
657 	v = 0;
658 	for(i = 0; i < 4; i++){
659 		c = Bgetc(b);
660 		if(c < 0)
661 			error("unexpected eof reading file information");
662 		v |= c << (i * 8);
663 	}
664 	return v;
665 }
666 
667 static int
668 get2(Biobuf *b)
669 {
670 	int i, c, v;
671 
672 	v = 0;
673 	for(i = 0; i < 2; i++){
674 		c = Bgetc(b);
675 		if(c < 0)
676 			error("unexpected eof reading file information");
677 		v |= c << (i * 8);
678 	}
679 	return v;
680 }
681 
682 static int
683 get1(Biobuf *b)
684 {
685 	int c;
686 
687 	c = Bgetc(b);
688 	if(c < 0)
689 		error("unexpected eof reading file information");
690 	return c;
691 }
692 
693 static long
694 msdos2time(int time, int date)
695 {
696 	Tm tm;
697 
698 	tm.hour = time >> 11;
699 	tm.min = (time >> 5) & 63;
700 	tm.sec = (time & 31) << 1;
701 	tm.year = 80 + (date >> 9);
702 	tm.mon = ((date >> 5) & 15) - 1;
703 	tm.mday = date & 31;
704 	tm.zone[0] = '\0';
705 	tm.yday = 0;
706 
707 	return tm2sec(&tm);
708 }
709 
710 static void*
711 emalloc(ulong n)
712 {
713 	void *p;
714 
715 	p = malloc(n);
716 	if(p == nil)
717 		sysfatal("out of memory");
718 	return p;
719 }
720 
721 static void
722 error(char *fmt, ...)
723 {
724 	va_list arg;
725 
726 	fprint(2, "unzip: ");
727 	va_start(arg, fmt);
728 	vfprint(2, fmt, arg);
729 	va_end(arg);
730 	fprint(2, "\n");
731 
732 	if(delfile != nil){
733 		fprint(2, "unzip: removing output file %s\n", delfile);
734 		remove(delfile);
735 		delfile = nil;
736 	}
737 
738 	longjmp(zjmp, 1);
739 }
740