xref: /plan9-contrib/sys/src/cmd/gzip/unzip.c (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "zip.h"
5 #include "deflate.h"
6 
7 enum
8 {
9 	BufSize	= 4096
10 };
11 
12 static	int	cheader(Biobuf *bin, ZipHead *zh);
13 static	int	copyout(int ofd, Biobuf *bin, long len);
14 static	int	crcwrite(void *ofd, void *buf, int n);
15 static	int	findCDir(Biobuf *bin, char *file);
16 static	int	get1(Biobuf *b);
17 static	int	get2(Biobuf *b);
18 static	ulong	get4(Biobuf *b);
19 static	char	*getname(Biobuf *b, int len);
20 static	int	header(Biobuf *bin, ZipHead *zh);
21 static	long	msdos2time(int time, int date);
22 static	int	sunzip(Biobuf *bin);
23 static	int	sunztable(Biobuf *bin);
24 static	void	trailer(Biobuf *bin, ZipHead *zh);
25 static	int	unzip(Biobuf *bin, char *file);
26 static	int	unzipEntry(Biobuf *bin, ZipHead *czh);
27 static	int	unztable(Biobuf *bin, char *file);
28 static	int	wantFile(char *file);
29 
30 static	void	*emalloc(ulong);
31 static	void	error(char*, ...);
32 #pragma	varargck	argpos	error	1
33 
34 static	Biobuf	bin;
35 static	ulong	crc;
36 static	int	debug;
37 static	char	*delfile;
38 static	int	lower;
39 static	int	nwant;
40 static	ulong	rlen;
41 static	int	settimes;
42 static	int	stdout;
43 static	int	verbose;
44 static	char	**want;
45 static	int	wbad;
46 static	ulong	wlen;
47 static	jmp_buf	zjmp;
48 
49 static void
50 usage(void)
51 {
52 	fprint(2, "usage: unzip [-tsv] [-f zipfile] [file ...]\n");
53 	exits("usage");
54 }
55 
56 void
57 main(int argc, char *argv[])
58 {
59 	char *zfile;
60 	int fd, ok, table, stream;
61 
62 	table = 0;
63 	stream = 0;
64 	zfile = nil;
65 	ARGBEGIN{
66 	case 'D':
67 		debug++;
68 		break;
69 	case 'c':
70 		stdout++;
71 		break;
72 	case 'i':
73 		lower++;
74 		break;
75 	case 'f':
76 		zfile = ARGF();
77 		if(zfile == nil)
78 			usage();
79 		break;
80 	case 's':
81 		stream++;
82 		break;
83 	case 't':
84 		table++;
85 		break;
86 	case 'T':
87 		settimes++;
88 		break;
89 	case 'v':
90 		verbose++;
91 		break;
92 	default:
93 		usage();
94 		break;
95 	}ARGEND
96 
97 	nwant = argc;
98 	want = argv;
99 
100 	mkcrctab(ZCrcPoly);
101 	inflateinit();
102 
103 	if(zfile == nil){
104 		Binit(&bin, 0, OREAD);
105 		zfile = "<stdin>";
106 	}else{
107 		fd = open(zfile, OREAD);
108 		if(fd < 0)
109 			sysfatal("can't open %s: %r", zfile);
110 		Binit(&bin, fd, OREAD);
111 	}
112 
113 	if(table){
114 		if(stream)
115 			ok = sunztable(&bin);
116 		else
117 			ok = unztable(&bin, zfile);
118 	}else{
119 		if(stream)
120 			ok = sunzip(&bin);
121 		else
122 			ok = unzip(&bin, zfile);
123 	}
124 
125 	exits(ok ? nil: "errors");
126 }
127 
128 /*
129  * print the table of contents from the "central directory structure"
130  */
131 static int
132 unztable(Biobuf *bin, char *file)
133 {
134 	ZipHead zh;
135 	int entries;
136 
137 	entries = findCDir(bin, file);
138 	if(entries < 0)
139 		return 0;
140 
141 	if(verbose > 1)
142 		print("%d items in the archive\n", entries);
143 	while(entries-- > 0){
144 		if(setjmp(zjmp)){
145 			free(zh.file);
146 			return 0;
147 		}
148 
149 		memset(&zh, 0, sizeof(zh));
150 		if(!cheader(bin, &zh))
151 			return 1;
152 
153 		if(wantFile(zh.file)){
154 			if(verbose)
155 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
156 			else
157 				print("%s\n", zh.file);
158 
159 			if(verbose > 1){
160 				print("\tmade by os %d vers %d.%d\n", zh.madeos, zh.madevers/10, zh.madevers % 10);
161 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers/10, zh.extvers % 10);
162 				print("\tflags %x\n", zh.flags);
163 				print("\tmethod %d\n", zh.meth);
164 				print("\tmod time %d\n", zh.modtime);
165 				print("\tmod date %d\n", zh.moddate);
166 				print("\tcrc %lux\n", zh.crc);
167 				print("\tcompressed size %lud\n", zh.csize);
168 				print("\tuncompressed size %lud\n", zh.uncsize);
169 				print("\tinternal attributes %ux\n", zh.iattr);
170 				print("\texternal attributes %lux\n", zh.eattr);
171 				print("\tstarts at %ld\n", zh.off);
172 			}
173 		}
174 
175 		free(zh.file);
176 		zh.file = nil;
177 	}
178 
179 	return 1;
180 }
181 
182 /*
183  * print the "local file header" table of contents
184  */
185 static int
186 sunztable(Biobuf *bin)
187 {
188 	ZipHead zh;
189 	vlong off;
190 	ulong hcrc, hcsize, huncsize;
191 	int ok;
192 
193 	ok = 1;
194 	for(;;){
195 		if(setjmp(zjmp)){
196 			free(zh.file);
197 			return 0;
198 		}
199 
200 		memset(&zh, 0, sizeof(zh));
201 		if(!header(bin, &zh))
202 			return ok;
203 
204 		hcrc = zh.crc;
205 		hcsize = zh.csize;
206 		huncsize = zh.uncsize;
207 
208 		wlen = 0;
209 		rlen = 0;
210 		crc = 0;
211 		wbad = 0;
212 
213 		if(zh.meth == 0){
214 			if(!copyout(-1, bin, zh.csize))
215 				error("reading data for %s failed: %r", zh.file);
216 		}else if(zh.meth == 8){
217 			off = Boffset(bin);
218 			if(!inflate((void*)-1, crcwrite, bin, (int(*)(void*))Bgetc))
219 				error("inflate %s failed: %r", zh.file);
220 			rlen = Boffset(bin) - off;
221 		}else
222 			error("can't handle compression method %d for %s", zh.meth, zh.file);
223 
224 		trailer(bin, &zh);
225 
226 		if(wantFile(zh.file)){
227 			if(verbose)
228 				print("%-32s %10lud %s", zh.file, zh.uncsize, ctime(msdos2time(zh.modtime, zh.moddate)));
229 			else
230 				print("%s\n", zh.file);
231 
232 			if(verbose > 1){
233 				print("\textract by os %d vers %d.%d\n", zh.extos, zh.extvers / 10, zh.extvers % 10);
234 				print("\tflags %x\n", zh.flags);
235 				print("\tmethod %d\n", zh.meth);
236 				print("\tmod time %d\n", zh.modtime);
237 				print("\tmod date %d\n", zh.moddate);
238 				print("\tcrc %lux\n", zh.crc);
239 				print("\tcompressed size %lud\n", zh.csize);
240 				print("\tuncompressed size %lud\n", zh.uncsize);
241 				if((zh.flags & ZTrailInfo) && (hcrc || hcsize || huncsize)){
242 					print("\theader crc %lux\n", zh.crc);
243 					print("\theader compressed size %lud\n", zh.csize);
244 					print("\theader uncompressed size %lud\n", zh.uncsize);
245 				}
246 			}
247 		}
248 
249 		if(zh.crc != crc)
250 			error("crc mismatch for %s", zh.file);
251 		if(zh.uncsize != wlen)
252 			error("output size mismatch for %s", zh.file);
253 		if(zh.csize != rlen)
254 			error("input size mismatch for %s", zh.file);
255 
256 
257 		free(zh.file);
258 		zh.file = nil;
259 	}
260 
261 	return ok;
262 }
263 
264 /*
265  * extract files using the info in the central directory structure
266  */
267 static int
268 unzip(Biobuf *bin, char *file)
269 {
270 	ZipHead zh;
271 	vlong off;
272 	int ok, eok, entries;
273 
274 	entries = findCDir(bin, file);
275 	if(entries < 0)
276 		return 0;
277 
278 	ok = 1;
279 	while(entries-- > 0){
280 		if(setjmp(zjmp)){
281 			free(zh.file);
282 			return 0;
283 		}
284 		memset(&zh, 0, sizeof(zh));
285 		if(!cheader(bin, &zh))
286 			return ok;
287 
288 
289 		off = Boffset(bin);
290 		if(wantFile(zh.file)){
291 			if(Bseek(bin, zh.off, 0) < 0){
292 				fprint(2, "unzip: can't seek to start of %s, skipping\n", zh.file);
293 				ok = 0;
294 			}else{
295 				eok = unzipEntry(bin, &zh);
296 				if(eok <= 0){
297 					fprint(2, "unzip: skipping %s\n", zh.file);
298 					ok = 0;
299 				}
300 			}
301 		}
302 
303 		free(zh.file);
304 		zh.file = nil;
305 
306 		if(Bseek(bin, off, 0) < 0){
307 			fprint(2, "unzip: can't seek to start of next entry, terminating extraction\n");
308 			return 0;
309 		}
310 	}
311 
312 	return ok;
313 }
314 
315 /*
316  * extract files using the info the "local file headers"
317  */
318 static int
319 sunzip(Biobuf *bin)
320 {
321 	int eok;
322 
323 	for(;;){
324 		eok = unzipEntry(bin, nil);
325 		if(eok == 0)
326 			return 1;
327 		if(eok < 0)
328 			return 0;
329 	}
330 
331 	return 1;
332 }
333 
334 /*
335  * extracts a single entry from a zip file
336  * czh is the optional corresponding central directory entry
337  */
338 static int
339 unzipEntry(Biobuf *bin, ZipHead *czh)
340 {
341 	Dir d;
342 	ZipHead zh;
343 	char *p;
344 	vlong off;
345 	int fd, isdir, ok;
346 
347 	zh.file = nil;
348 	if(setjmp(zjmp)){
349 		delfile = nil;
350 		free(zh.file);
351 		return -1;
352 	}
353 
354 	memset(&zh, 0, sizeof(zh));
355 	if(!header(bin, &zh))
356 		return 0;
357 
358 	ok = 1;
359 	isdir = 0;
360 
361 	fd = -1;
362 	if(wantFile(zh.file)){
363 		if(verbose)
364 			fprint(2, "extracting %s\n", zh.file);
365 
366 		if(czh != nil && czh->extos == ZDos){
367 			isdir = czh->eattr & ZDDir;
368 			if(isdir && zh.uncsize != 0)
369 				fprint(2, "unzip: ignoring directory data for %s\n", zh.file);
370 		}else if(zh.meth == 0 && zh.uncsize == 0){
371 			p = strchr(zh.file, '\0');
372 			if(p > zh.file && p[-1] == '/')
373 				isdir = 1;
374 		}
375 
376 		if(stdout){
377 			if(ok && !isdir)
378 				fd = 1;
379 		}else if(isdir){
380 			fd = create(zh.file, OREAD, CHDIR | 0775);
381 			if(fd < 0){
382 				if(dirstat(zh.file, &d) < 0 || (d.mode & CHDIR) != CHDIR){
383 					fprint(2, "unzip: can't create directory %s: %r\n", zh.file);
384 					ok = 0;
385 				}
386 			}
387 		}else if(ok){
388 			fd = create(zh.file, OWRITE, 0664);
389 			if(fd < 0){
390 				fprint(2, "unzip: can't create %s: %r\n", zh.file);
391 				ok = 0;
392 			}else
393 				delfile = zh.file;
394 		}
395 	}
396 
397 	wlen = 0;
398 	rlen = 0;
399 	crc = 0;
400 	wbad = 0;
401 
402 	if(zh.meth == 0){
403 		if(!copyout(fd, bin, zh.csize))
404 			error("copying data for %s failed: %r", zh.file);
405 	}else if(zh.meth == 8){
406 		off = Boffset(bin);
407 		if(!inflate((void*)fd, crcwrite, bin, (int(*)(void*))Bgetc))
408 			error("inflate failed: %r");
409 		rlen = Boffset(bin) - off;
410 	}else
411 		error("can't handle compression method %d for %s", zh.meth, zh.file);
412 
413 	trailer(bin, &zh);
414 
415 	if(zh.crc != crc)
416 		error("crc mismatch for %s", zh.file);
417 	if(zh.uncsize != wlen)
418 		error("output size mismatch for %s", zh.file);
419 	if(zh.csize != rlen)
420 		error("input size mismatch for %s", zh.file);
421 
422 	delfile = nil;
423 	free(zh.file);
424 
425 	if(fd >= 0 && !stdout){
426 		if(settimes){
427 			if(dirfstat(fd, &d) >= 0){
428 				d.mtime = msdos2time(zh.modtime, zh.moddate);
429 				if(d.mtime)
430 					dirfwstat(fd, &d);
431 			}
432 		}
433 		close(fd);
434 	}
435 
436 	return ok;
437 }
438 
439 static int
440 wantFile(char *file)
441 {
442 	int i, n;
443 
444 	if(nwant == 0)
445 		return 1;
446 	for(i = 0; i < nwant; i++){
447 		if(strcmp(want[i], file) == 0)
448 			return 1;
449 		n = strlen(want[i]);
450 		if(strncmp(want[i], file, n) == 0 && file[n] == '/')
451 			return 1;
452 	}
453 	return 0;
454 }
455 
456 /*
457  * find the start of the central directory
458  * returns the number of entries in the directory,
459  * or -1 if there was an error
460  */
461 static int
462 findCDir(Biobuf *bin, char *file)
463 {
464 	vlong ecoff;
465 	long off, size;
466 	int entries, zclen, dn, ds, de;
467 
468 	ecoff = Bseek(bin, -ZECHeadSize, 2);
469 	if(ecoff < 0){
470 		fprint(2, "unzip: can't seek to contents of %s; try adding -s\n", file);
471 		return -1;
472 	}
473 	if(setjmp(zjmp))
474 		return -1;
475 
476 	if(get4(bin) != ZECHeader){
477 		fprint(2, "unzip: bad magic number for contents of %s\n", file);
478 		return -1;
479 	}
480 	dn = get2(bin);
481 	ds = get2(bin);
482 	de = get2(bin);
483 	entries = get2(bin);
484 	size = get4(bin);
485 	off = get4(bin);
486 	zclen = get2(bin);
487 	while(zclen-- > 0)
488 		get1(bin);
489 
490 	if(verbose > 1){
491 		print("table starts at %ld for %ld bytes\n", off, size);
492 		if(ecoff - size != off)
493 			print("\ttable should start at %lld-%ld=%lld\n", ecoff, size, ecoff-size);
494 		if(dn || ds || de != entries)
495 			print("\tcurrent disk=%d start disk=%d table entries on this disk=%d\n", dn, ds, de);
496 	}
497 
498 	if(Bseek(bin, off, 0) != off){
499 		fprint(2, "unzip: can't seek to start of contents of %s\n", file);
500 		return -1;
501 	}
502 
503 	return entries;
504 }
505 
506 static int
507 cheader(Biobuf *bin, ZipHead *zh)
508 {
509 	ulong v;
510 	int flen, xlen, fclen;
511 
512 	v = get4(bin);
513 	if(v != ZCHeader){
514 		if(v == ZECHeader)
515 			return 0;
516 		error("bad magic number %lux", v);
517 	}
518 	zh->madevers = get1(bin);
519 	zh->madeos = get1(bin);
520 	zh->extvers = get1(bin);
521 	zh->extos = get1(bin);
522 	zh->flags = get2(bin);
523 	zh->meth = get2(bin);
524 	zh->modtime = get2(bin);
525 	zh->moddate = get2(bin);
526 	zh->crc = get4(bin);
527 	zh->csize = get4(bin);
528 	zh->uncsize = get4(bin);
529 	flen = get2(bin);
530 	xlen = get2(bin);
531 	fclen = get2(bin);
532 	get2(bin);		/* disk number start */
533 	zh->iattr = get2(bin);
534 	zh->eattr = get4(bin);
535 	zh->off = get4(bin);
536 
537 	zh->file = getname(bin, flen);
538 
539 	while(xlen-- > 0)
540 		get1(bin);
541 
542 	while(fclen-- > 0)
543 		get1(bin);
544 
545 	return 1;
546 }
547 
548 static int
549 header(Biobuf *bin, ZipHead *zh)
550 {
551 	ulong v;
552 	int flen, xlen;
553 
554 	v = get4(bin);
555 	if(v != ZHeader){
556 		if(v == ZCHeader)
557 			return 0;
558 		error("bad magic number %lux at %lld", v, Boffset(bin)-4);
559 	}
560 	zh->extvers = get1(bin);
561 	zh->extos = get1(bin);
562 	zh->flags = get2(bin);
563 	zh->meth = get2(bin);
564 	zh->modtime = get2(bin);
565 	zh->moddate = get2(bin);
566 	zh->crc = get4(bin);
567 	zh->csize = get4(bin);
568 	zh->uncsize = get4(bin);
569 	flen = get2(bin);
570 	xlen = get2(bin);
571 
572 	zh->file = getname(bin, flen);
573 
574 	while(xlen-- > 0)
575 		get1(bin);
576 
577 	return 1;
578 }
579 
580 static void
581 trailer(Biobuf *bin, ZipHead *zh)
582 {
583 	if(zh->flags & ZTrailInfo){
584 		zh->crc = get4(bin);
585 		zh->csize = get4(bin);
586 		zh->uncsize = get4(bin);
587 	}
588 }
589 
590 static char*
591 getname(Biobuf *bin, int len)
592 {
593 	char *s;
594 	int i, c;
595 
596 	s = emalloc(len + 1);
597 	for(i = 0; i < len; i++){
598 		c = get1(bin);
599 		if(lower)
600 			c = tolower(c);
601 		s[i] = c;
602 	}
603 	s[i] = '\0';
604 	return s;
605 }
606 
607 static int
608 crcwrite(void *out, void *buf, int n)
609 {
610 	int fd, nw;
611 
612 	wlen += n;
613 	crc = blockcrc(crc, buf, n);
614 	fd = (int)out;
615 	if(fd < 0)
616 		return n;
617 	nw = write(fd, buf, n);
618 	if(nw != n)
619 		wbad = 1;
620 	return nw;
621 }
622 
623 static int
624 copyout(int ofd, Biobuf *bin, long len)
625 {
626 	char buf[BufSize];
627 	int n;
628 
629 	for(; len > 0; len -= n){
630 		n = len;
631 		if(n > BufSize)
632 			n = BufSize;
633 		n = Bread(bin, buf, n);
634 		if(n <= 0)
635 			return 0;
636 		rlen += n;
637 		if(crcwrite((void*)ofd, buf, n) != n)
638 			return 0;
639 	}
640 	return 1;
641 }
642 
643 static ulong
644 get4(Biobuf *b)
645 {
646 	ulong v;
647 	int i, c;
648 
649 	v = 0;
650 	for(i = 0; i < 4; i++){
651 		c = Bgetc(b);
652 		if(c < 0)
653 			error("unexpected eof reading file information");
654 		v |= c << (i * 8);
655 	}
656 	return v;
657 }
658 
659 static int
660 get2(Biobuf *b)
661 {
662 	int i, c, v;
663 
664 	v = 0;
665 	for(i = 0; i < 2; i++){
666 		c = Bgetc(b);
667 		if(c < 0)
668 			error("unexpected eof reading file information");
669 		v |= c << (i * 8);
670 	}
671 	return v;
672 }
673 
674 static int
675 get1(Biobuf *b)
676 {
677 	int c;
678 
679 	c = Bgetc(b);
680 	if(c < 0)
681 		error("unexpected eof reading file information");
682 	return c;
683 }
684 
685 static long
686 msdos2time(int time, int date)
687 {
688 	Tm tm;
689 
690 	tm.hour = time >> 11;
691 	tm.min = (time >> 5) & 63;
692 	tm.sec = (time & 31) << 1;
693 	tm.year = 80 + (date >> 9);
694 	tm.mon = ((date >> 5) & 15) - 1;
695 	tm.mday = date & 31;
696 	tm.zone[0] = '\0';
697 
698 	return tm2sec(&tm);
699 }
700 
701 static void*
702 emalloc(ulong n)
703 {
704 	void *p;
705 
706 	p = malloc(n);
707 	if(p == nil)
708 		sysfatal("out of memory");
709 	return p;
710 }
711 
712 static void
713 error(char *fmt, ...)
714 {
715 	char buf[1024];
716 	va_list arg;
717 
718 	va_start(arg, fmt);
719 	doprint(buf, buf+sizeof(buf), fmt, arg);
720 	va_end(arg);
721 	fprint(2, "unzip: %s\n", buf);
722 
723 	if(delfile != nil){
724 		fprint(2, "unzip: removing output file %s\n", delfile);
725 		remove(delfile);
726 		delfile = nil;
727 	}
728 
729 	longjmp(zjmp, 1);
730 }
731