xref: /plan9/sys/src/cmd/tar.c (revision 60014d6756a98ad10929607ca84a1b7488a16cfc)
1 /*
2  * tar - `tape archiver', actually usable on any medium.
3  *	POSIX "ustar" compliant when extracting, and by default when creating.
4  *	this tar attempts to read and write multiple Tblock-byte blocks
5  *	at once to and from the filesystem, and does not copy blocks
6  *	around internally.
7  */
8 
9 #include <u.h>
10 #include <libc.h>
11 #include <fcall.h>		/* for %M */
12 #include <String.h>
13 
14 /*
15  * modified versions of those in libc.h; scans only the first arg for
16  * keyletters and options.
17  */
18 #define	TARGBEGIN {\
19 	(argv0 || (argv0 = *argv)), argv++, argc--;\
20 	if (argv[0]) {\
21 		char *_args, *_argt;\
22 		Rune _argc;\
23 		_args = &argv[0][0];\
24 		_argc = 0;\
25 		while(*_args && (_args += chartorune(&_argc, _args)))\
26 			switch(_argc)
27 #define	TARGEND	SET(_argt); USED(_argt);USED(_argc);USED(_args); \
28 	argc--, argv++; } \
29 	USED(argv); USED(argc); }
30 #define	TARGC() (_argc)
31 
32 #define ROUNDUP(a, b)	(((a) + (b) - 1)/(b))
33 #define BYTES2TBLKS(bytes) ROUNDUP(bytes, Tblock)
34 
35 /* read big-endian binary integers; args must be (uchar *) */
36 #define	G2BEBYTE(x)	(((x)[0]<<8)  |  (x)[1])
37 #define	G3BEBYTE(x)	(((x)[0]<<16) | ((x)[1]<<8)  |  (x)[2])
38 #define	G4BEBYTE(x)	(((x)[0]<<24) | ((x)[1]<<16) | ((x)[2]<<8) | (x)[3])
39 #define	G8BEBYTE(x)	(((vlong)G4BEBYTE(x)<<32) | (u32int)G4BEBYTE((x)+4))
40 
41 typedef vlong Off;
42 typedef char *(*Refill)(int ar, char *bufs, int justhdr);
43 
44 enum { Stdin, Stdout, Stderr };
45 enum { Rd, Wr };			/* pipe fd-array indices */
46 enum { Output, Input };
47 enum { None, Toc, Xtract, Replace };
48 enum { Alldata, Justnxthdr };
49 enum {
50 	Tblock = 512,
51 	Namsiz = 100,
52 	Maxpfx = 155,		/* from POSIX */
53 	Maxname = Namsiz + 1 + Maxpfx,
54 	Binsize = 0x80,		/* flag in size[0], from gnu: positive binary size */
55 	Binnegsz = 0xff,	/* flag in size[0]: negative binary size */
56 
57 	Nblock = 40,		/* maximum blocksize */
58 	Dblock = 20,		/* default blocksize */
59 	Debug = 0,
60 };
61 
62 /* POSIX link flags */
63 enum {
64 	LF_PLAIN1 =	'\0',
65 	LF_PLAIN2 =	'0',
66 	LF_LINK =	'1',
67 	LF_SYMLINK1 =	'2',
68 	LF_SYMLINK2 =	's',		/* 4BSD used this */
69 	LF_CHR =	'3',
70 	LF_BLK =	'4',
71 	LF_DIR =	'5',
72 	LF_FIFO =	'6',
73 	LF_CONTIG =	'7',
74 	/* 'A' - 'Z' are reserved for custom implementations */
75 };
76 
77 #define islink(lf)	(isreallink(lf) || issymlink(lf))
78 #define isreallink(lf)	((lf) == LF_LINK)
79 #define issymlink(lf)	((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
80 
81 typedef union {
82 	uchar	data[Tblock];
83 	struct {
84 		char	name[Namsiz];
85 		char	mode[8];
86 		char	uid[8];
87 		char	gid[8];
88 		char	size[12];
89 		char	mtime[12];
90 		char	chksum[8];
91 		char	linkflag;
92 		char	linkname[Namsiz];
93 
94 		/* rest are defined by POSIX's ustar format; see p1003.2b */
95 		char	magic[6];	/* "ustar" */
96 		char	version[2];
97 		char	uname[32];
98 		char	gname[32];
99 		char	devmajor[8];
100 		char	devminor[8];
101 		char	prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
102 	};
103 } Hdr;
104 
105 typedef struct {
106 	char	*comp;
107 	char	*decomp;
108 	char	*sfx[4];
109 } Compress;
110 
111 static Compress comps[] = {
112 	"gzip",		"gunzip",	{ ".tar.gz", ".tgz" },	/* default */
113 	"compress",	"uncompress",	{ ".tar.Z",  ".tz" },
114 	"bzip2",	"bunzip2",	{ ".tar.bz", ".tbz",
115 					  ".tar.bz2",".tbz2" },
116 };
117 
118 typedef struct {
119 	int	kid;
120 	int	fd;	/* original fd */
121 	int	rfd;	/* replacement fd */
122 	int	input;
123 	int	open;
124 } Pushstate;
125 
126 #define OTHER(rdwr) (rdwr == Rd? Wr: Rd)
127 
128 static int debug;
129 static int fixednblock;
130 static int verb;
131 static int posix = 1;
132 static int docreate;
133 static int aruid;
134 static int argid;
135 static int relative = 1;
136 static int settime;
137 static int verbose;
138 static int docompress;
139 static int keepexisting;
140 static int ignerrs;		/* flag: ignore i/o errors if possible */
141 static Off blkoff;		/* offset of the current archive block (not Tblock) */
142 static Off nexthdr;
143 
144 static int nblock = Dblock;
145 static char *usefile, *arname = "archive";
146 static char origdir[Maxname*2];
147 static Hdr *tpblk, *endblk;
148 static Hdr *curblk;
149 
150 static void
151 usage(void)
152 {
153 	fprint(2, "usage: %s {crtx}[PRTfgikmpuvz] [archive] file1 file2...\n",
154 		argv0);
155 	exits("usage");
156 }
157 
158 /* I/O, with error retry or exit */
159 
160 static int
161 cope(char *name, int fd, void *buf, long len, Off off)
162 {
163 	fprint(2, "%s: %serror reading %s: %r\n", argv0,
164 		(ignerrs? "ignoring ": ""), name);
165 	if (!ignerrs)
166 		exits("read error");
167 
168 	/* pretend we read len bytes of zeroes */
169 	memset(buf, 0, len);
170 	if (off >= 0)			/* seekable? */
171 		seek(fd, off + len, 0);
172 	return len;
173 }
174 
175 static int
176 eread(char *name, int fd, void *buf, long len)
177 {
178 	int rd;
179 	Off off;
180 
181 	off = seek(fd, 0, 1);		/* for coping with errors */
182 	rd = read(fd, buf, len);
183 	if (rd < 0)
184 		rd = cope(name, fd, buf, len, off);
185 	return rd;
186 }
187 
188 static int
189 ereadn(char *name, int fd, void *buf, long len)
190 {
191 	int rd;
192 	Off off;
193 
194 	off = seek(fd, 0, 1);
195 	rd = readn(fd, buf, len);
196 	if (rd < 0)
197 		rd = cope(name, fd, buf, len, off);
198 	return rd;
199 }
200 
201 static int
202 ewrite(char *name, int fd, void *buf, long len)
203 {
204 	int rd;
205 
206 	werrstr("");
207 	rd = write(fd, buf, len);
208 	if (rd != len)
209 		sysfatal("error writing %s: %r", name);
210 	return rd;
211 }
212 
213 /* compression */
214 
215 static Compress *
216 compmethod(char *name)
217 {
218 	int i, nmlen = strlen(name), sfxlen;
219 	Compress *cp;
220 
221 	for (cp = comps; cp < comps + nelem(comps); cp++)
222 		for (i = 0; i < nelem(cp->sfx) && cp->sfx[i]; i++) {
223 			sfxlen = strlen(cp->sfx[i]);
224 			if (nmlen > sfxlen &&
225 			    strcmp(cp->sfx[i], name + nmlen - sfxlen) == 0)
226 				return cp;
227 		}
228 	return docompress? comps: nil;
229 }
230 
231 /*
232  * push a filter, cmd, onto fd.  if input, it's an input descriptor.
233  * returns a descriptor to replace fd, or -1 on error.
234  */
235 static int
236 push(int fd, char *cmd, int input, Pushstate *ps)
237 {
238 	int nfd, pifds[2];
239 	String *s;
240 
241 	ps->open = 0;
242 	ps->fd = fd;
243 	ps->input = input;
244 	if (fd < 0 || pipe(pifds) < 0)
245 		return -1;
246 	ps->kid = fork();
247 	switch (ps->kid) {
248 	case -1:
249 		return -1;
250 	case 0:
251 		if (input)
252 			dup(pifds[Wr], Stdout);
253 		else
254 			dup(pifds[Rd], Stdin);
255 		close(pifds[input? Rd: Wr]);
256 		dup(fd, (input? Stdin: Stdout));
257 		s = s_new();
258 		if (cmd[0] != '/')
259 			s_append(s, "/bin/");
260 		s_append(s, cmd);
261 		execl(s_to_c(s), cmd, nil);
262 		sysfatal("can't exec %s: %r", cmd);
263 	default:
264 		nfd = pifds[input? Rd: Wr];
265 		close(pifds[input? Wr: Rd]);
266 		break;
267 	}
268 	ps->rfd = nfd;
269 	ps->open = 1;
270 	return nfd;
271 }
272 
273 static char *
274 pushclose(Pushstate *ps)
275 {
276 	Waitmsg *wm;
277 
278 	if (ps->fd < 0 || ps->rfd < 0 || !ps->open)
279 		return "not open";
280 	close(ps->rfd);
281 	ps->rfd = -1;
282 	ps->open = 0;
283 	while ((wm = wait()) != nil && wm->pid != ps->kid)
284 		continue;
285 	return wm? wm->msg: nil;
286 }
287 
288 /*
289  * block-buffer management
290  */
291 
292 static void
293 initblks(void)
294 {
295 	free(tpblk);
296 	tpblk = malloc(Tblock * nblock);
297 	assert(tpblk != nil);
298 	endblk = tpblk + nblock;
299 }
300 
301 /*
302  * (re)fill block buffers from archive.  `justhdr' means we don't care
303  * about the data before the next header block.
304  */
305 static char *
306 refill(int ar, char *bufs, int justhdr)
307 {
308 	int i, n;
309 	unsigned bytes = Tblock * nblock;
310 	static int done, first = 1, seekable;
311 
312 	if (done)
313 		return nil;
314 
315 	blkoff = seek(ar, 0, 1);		/* note position for `tar r' */
316 	if (first)
317 		seekable = blkoff >= 0;
318 	/* try to size non-pipe input at first read */
319 	if (first && usefile && !fixednblock) {
320 		n = eread(arname, ar, bufs, bytes);
321 		if (n == 0)
322 			sysfatal("EOF reading archive: %r");
323 		i = n;
324 		if (i % Tblock != 0)
325 			sysfatal("archive block size (%d) error", i);
326 		i /= Tblock;
327 		if (i != nblock) {
328 			nblock = i;
329 			fprint(2, "%s: blocking = %d\n", argv0, nblock);
330 			endblk = (Hdr *)bufs + nblock;
331 			bytes = n;
332 		}
333 	} else if (justhdr && seekable && nexthdr - blkoff >= bytes) {
334 		/* optimisation for huge archive members on seekable media */
335 		if (seek(ar, bytes, 1) < 0)
336 			sysfatal("can't seek on archive: %r");
337 		n = bytes;
338 	} else
339 		n = ereadn(arname, ar, bufs, bytes);
340 	first = 0;
341 
342 	if (n == 0)
343 		sysfatal("unexpected EOF reading archive");
344 	if (n % Tblock != 0)
345 		sysfatal("partial block read from archive");
346 	if (n != bytes) {
347 		done = 1;
348 		memset(bufs + n, 0, bytes - n);
349 	}
350 	return bufs;
351 }
352 
353 static Hdr *
354 getblk(int ar, Refill rfp, int justhdr)
355 {
356 	if (curblk == nil || curblk >= endblk) {  /* input block exhausted? */
357 		if (rfp != nil && (*rfp)(ar, (char *)tpblk, justhdr) == nil)
358 			return nil;
359 		curblk = tpblk;
360 	}
361 	return curblk++;
362 }
363 
364 static Hdr *
365 getblkrd(int ar, int justhdr)
366 {
367 	return getblk(ar, refill, justhdr);
368 }
369 
370 static Hdr *
371 getblke(int ar)
372 {
373 	return getblk(ar, nil, Alldata);
374 }
375 
376 static Hdr *
377 getblkz(int ar)
378 {
379 	Hdr *hp = getblke(ar);
380 
381 	if (hp != nil)
382 		memset(hp->data, 0, Tblock);
383 	return hp;
384 }
385 
386 /*
387  * how many block buffers are available, starting at the address
388  * just returned by getblk*?
389  */
390 static int
391 gothowmany(int max)
392 {
393 	int n = endblk - (curblk - 1);
394 
395 	return n > max? max: n;
396 }
397 
398 /*
399  * indicate that one is done with the last block obtained from getblke
400  * and it is now available to be written into the archive.
401  */
402 static void
403 putlastblk(int ar)
404 {
405 	unsigned bytes = Tblock * nblock;
406 
407 	/* if writing end-of-archive, aid compression (good hygiene too) */
408 	if (curblk < endblk)
409 		memset(curblk, 0, (char *)endblk - (char *)curblk);
410 	ewrite(arname, ar, tpblk, bytes);
411 }
412 
413 static void
414 putblk(int ar)
415 {
416 	if (curblk >= endblk)
417 		putlastblk(ar);
418 }
419 
420 static void
421 putbackblk(int ar)
422 {
423 	curblk--;
424 	USED(ar);
425 }
426 
427 static void
428 putreadblks(int ar, int blks)
429 {
430 	curblk += blks - 1;
431 	USED(ar);
432 }
433 
434 static void
435 putblkmany(int ar, int blks)
436 {
437 	assert(blks > 0);
438 	curblk += blks - 1;
439 	putblk(ar);
440 }
441 
442 /*
443  * common routines
444  */
445 
446 /*
447  * modifies hp->chksum but restores it; important for the last block of the
448  * old archive when updating with `tar rf archive'
449  */
450 static long
451 chksum(Hdr *hp)
452 {
453 	int n = Tblock;
454 	long i = 0;
455 	uchar *cp = hp->data;
456 	char oldsum[sizeof hp->chksum];
457 
458 	memmove(oldsum, hp->chksum, sizeof oldsum);
459 	memset(hp->chksum, ' ', sizeof hp->chksum);
460 	while (n-- > 0)
461 		i += *cp++;
462 	memmove(hp->chksum, oldsum, sizeof oldsum);
463 	return i;
464 }
465 
466 static int
467 isustar(Hdr *hp)
468 {
469 	return strcmp(hp->magic, "ustar") == 0;
470 }
471 
472 /*
473  * s is at most n bytes long, but need not be NUL-terminated.
474  * if shorter than n bytes, all bytes after the first NUL must also
475  * be NUL.
476  */
477 static int
478 strnlen(char *s, int n)
479 {
480 	return s[n - 1] != '\0'? n: strlen(s);
481 }
482 
483 /* set fullname from header */
484 static char *
485 name(Hdr *hp)
486 {
487 	int pfxlen, namlen;
488 	static char fullnamebuf[2+Maxname+1];  /* 2+ for ./ on relative names */
489 	char *fullname;
490 
491 	fullname = fullnamebuf+2;
492 	namlen = strnlen(hp->name, sizeof hp->name);
493 	if (hp->prefix[0] == '\0' || !isustar(hp)) {	/* old-style name? */
494 		memmove(fullname, hp->name, namlen);
495 		fullname[namlen] = '\0';
496 		return fullname;
497 	}
498 
499 	/* name is in two pieces */
500 	pfxlen = strnlen(hp->prefix, sizeof hp->prefix);
501 	memmove(fullname, hp->prefix, pfxlen);
502 	fullname[pfxlen] = '/';
503 	memmove(fullname + pfxlen + 1, hp->name, namlen);
504 	fullname[pfxlen + 1 + namlen] = '\0';
505 	return fullname;
506 }
507 
508 static int
509 isdir(Hdr *hp)
510 {
511 	/* the mode test is ugly but sometimes necessary */
512 	return hp->linkflag == LF_DIR ||
513 		strrchr(name(hp), '\0')[-1] == '/' ||
514 		(strtoul(hp->mode, nil, 8)&0170000) == 040000;
515 }
516 
517 static int
518 eotar(Hdr *hp)
519 {
520 	return name(hp)[0] == '\0';
521 }
522 
523 /*
524 static uvlong
525 getbe(uchar *src, int size)
526 {
527 	uvlong vl = 0;
528 
529 	while (size-- > 0) {
530 		vl <<= 8;
531 		vl |= *src++;
532 	}
533 	return vl;
534 }
535  */
536 
537 static void
538 putbe(uchar *dest, uvlong vl, int size)
539 {
540 	for (dest += size; size-- > 0; vl >>= 8)
541 		*--dest = vl;
542 }
543 
544 /*
545  * return the nominal size from the header block, which is not always the
546  * size in the archive (the archive size may be zero for some file types
547  * regardless of the nominal size).
548  *
549  * gnu and freebsd tars are now recording vlongs as big-endian binary
550  * with a flag in byte 0 to indicate this, which permits file sizes up to
551  * 2^64-1 (actually 2^80-1 but our file sizes are vlongs) rather than 2^33-1.
552  */
553 static Off
554 hdrsize(Hdr *hp)
555 {
556 	uchar *p;
557 
558 	if((uchar)hp->size[0] == Binnegsz) {
559 		fprint(2, "%s: %s: negative length, which is insane\n",
560 			argv0, name(hp));
561 		return 0;
562 	} else if((uchar)hp->size[0] == Binsize) {
563 		p = (uchar *)hp->size + sizeof hp->size - 1 -
564 			sizeof(vlong);		/* -1 for terminating space */
565 		return G8BEBYTE(p);
566 	} else
567 		return strtoull(hp->size, nil, 8);
568 }
569 
570 /*
571  * return the number of bytes recorded in the archive.
572  */
573 static Off
574 arsize(Hdr *hp)
575 {
576 	if(isdir(hp) || islink(hp->linkflag))
577 		return 0;
578 	return hdrsize(hp);
579 }
580 
581 static Hdr *
582 readhdr(int ar)
583 {
584 	long hdrcksum;
585 	Hdr *hp;
586 
587 	hp = getblkrd(ar, Alldata);
588 	if (hp == nil)
589 		sysfatal("unexpected EOF instead of archive header");
590 	if (eotar(hp))			/* end-of-archive block? */
591 		return nil;
592 	hdrcksum = strtoul(hp->chksum, nil, 8);
593 	if (chksum(hp) != hdrcksum)
594 		sysfatal("bad archive header checksum: name %.64s...",
595 			hp->name);
596 	nexthdr += Tblock*(1 + BYTES2TBLKS(arsize(hp)));
597 	return hp;
598 }
599 
600 /*
601  * tar r[c]
602  */
603 
604 /*
605  * if name is longer than Namsiz bytes, try to split it at a slash and fit the
606  * pieces into hp->prefix and hp->name.
607  */
608 static int
609 putfullname(Hdr *hp, char *name)
610 {
611 	int namlen, pfxlen;
612 	char *sl, *osl;
613 	String *slname = nil;
614 
615 	if (isdir(hp)) {
616 		slname = s_new();
617 		s_append(slname, name);
618 		s_append(slname, "/");		/* posix requires this */
619 		name = s_to_c(slname);
620 	}
621 
622 	namlen = strlen(name);
623 	if (namlen <= Namsiz) {
624 		strncpy(hp->name, name, Namsiz);
625 		hp->prefix[0] = '\0';		/* ustar paranoia */
626 		return 0;
627 	}
628 
629 	if (!posix || namlen > Maxname) {
630 		fprint(2, "%s: name too long for tar header: %s\n",
631 			argv0, name);
632 		return -1;
633 	}
634 	/*
635 	 * try various splits until one results in pieces that fit into the
636 	 * appropriate fields of the header.  look for slashes from right
637 	 * to left, in the hopes of putting the largest part of the name into
638 	 * hp->prefix, which is larger than hp->name.
639 	 */
640 	sl = strrchr(name, '/');
641 	while (sl != nil) {
642 		pfxlen = sl - name;
643 		if (pfxlen <= sizeof hp->prefix && namlen-1 - pfxlen <= Namsiz)
644 			break;
645 		osl = sl;
646 		*osl = '\0';
647 		sl = strrchr(name, '/');
648 		*osl = '/';
649 	}
650 	if (sl == nil) {
651 		fprint(2, "%s: name can't be split to fit tar header: %s\n",
652 			argv0, name);
653 		return -1;
654 	}
655 	*sl = '\0';
656 	strncpy(hp->prefix, name, sizeof hp->prefix);
657 	*sl++ = '/';
658 	strncpy(hp->name, sl, sizeof hp->name);
659 	if (slname)
660 		s_free(slname);
661 	return 0;
662 }
663 
664 static int
665 mkhdr(Hdr *hp, Dir *dir, char *file)
666 {
667 	/*
668 	 * some of these fields run together, so we format them left-to-right
669 	 * and don't use snprint.
670 	 */
671 	sprint(hp->mode, "%6lo ", dir->mode & 0777);
672 	sprint(hp->uid, "%6o ", aruid);
673 	sprint(hp->gid, "%6o ", argid);
674 	if (dir->length >= (Off)1<<32) {
675 		static int printed;
676 
677 		if (!printed) {
678 			printed = 1;
679 			fprint(2, "%s: storing large sizes in \"base 256\"\n", argv0);
680 		}
681 		hp->size[0] = Binsize;
682 		/* emit so-called `base 256' representation of size */
683 		putbe((uchar *)hp->size+1, dir->length, sizeof hp->size - 2);
684 		hp->size[sizeof hp->size - 1] = ' ';
685 	} else
686 		sprint(hp->size, "%11lluo ", dir->length);
687 	sprint(hp->mtime, "%11luo ", dir->mtime);
688 	hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
689 	putfullname(hp, file);
690 	if (posix) {
691 		strncpy(hp->magic, "ustar", sizeof hp->magic);
692 		strncpy(hp->version, "00", sizeof hp->version);
693 		strncpy(hp->uname, dir->uid, sizeof hp->uname);
694 		strncpy(hp->gname, dir->gid, sizeof hp->gname);
695 	}
696 	sprint(hp->chksum, "%6luo", chksum(hp));
697 	return 0;
698 }
699 
700 static void addtoar(int ar, char *file, char *shortf);
701 
702 static void
703 addtreetoar(int ar, char *file, char *shortf, int fd)
704 {
705 	int n;
706 	Dir *dent, *dirents;
707 	String *name = s_new();
708 
709 	n = dirreadall(fd, &dirents);
710 	if (n < 0)
711 		fprint(2, "%s: dirreadall %s: %r\n", argv0, file);
712 	close(fd);
713 	if (n <= 0)
714 		return;
715 
716 	if (chdir(shortf) < 0)
717 		sysfatal("chdir %s: %r", file);
718 	if (Debug)
719 		fprint(2, "chdir %s\t# %s\n", shortf, file);
720 
721 	for (dent = dirents; dent < dirents + n; dent++) {
722 		s_reset(name);
723 		s_append(name, file);
724 		s_append(name, "/");
725 		s_append(name, dent->name);
726 		addtoar(ar, s_to_c(name), dent->name);
727 	}
728 	s_free(name);
729 	free(dirents);
730 
731 	/*
732 	 * this assumes that shortf is just one component, which is true
733 	 * during directory descent, but not necessarily true of command-line
734 	 * arguments.  Our caller (or addtoar's) must reset the working
735 	 * directory if necessary.
736 	 */
737 	if (chdir("..") < 0)
738 		sysfatal("chdir %s/..: %r", file);
739 	if (Debug)
740 		fprint(2, "chdir ..\n");
741 }
742 
743 static void
744 addtoar(int ar, char *file, char *shortf)
745 {
746 	int n, fd, isdir;
747 	long bytes, blksread;
748 	ulong blksleft;
749 	Hdr *hbp;
750 	Dir *dir;
751 	String *name = nil;
752 
753 	if (shortf[0] == '#') {
754 		name = s_new();
755 		s_append(name, "./");
756 		s_append(name, shortf);
757 		shortf = s_to_c(name);
758 	}
759 
760 	if (Debug)
761 		fprint(2, "opening %s	# %s\n", shortf, file);
762 	fd = open(shortf, OREAD);
763 	if (fd < 0) {
764 		fprint(2, "%s: can't open %s: %r\n", argv0, file);
765 		if (name)
766 			s_free(name);
767 		return;
768 	}
769 	dir = dirfstat(fd);
770 	if (dir == nil)
771 		sysfatal("can't fstat %s: %r", file);
772 
773 	hbp = getblkz(ar);
774 	isdir = (dir->qid.type & QTDIR) != 0;
775 	if (mkhdr(hbp, dir, file) < 0) {
776 		putbackblk(ar);
777 		free(dir);
778 		close(fd);
779 		if (name)
780 			s_free(name);
781 		return;
782 	}
783 	putblk(ar);
784 
785 	blksleft = BYTES2TBLKS(dir->length);
786 	free(dir);
787 
788 	if (isdir)
789 		addtreetoar(ar, file, shortf, fd);
790 	else {
791 		for (; blksleft > 0; blksleft -= blksread) {
792 			hbp = getblke(ar);
793 			blksread = gothowmany(blksleft);
794 			assert(blksread >= 0);
795 			bytes = blksread * Tblock;
796 			n = ereadn(file, fd, hbp->data, bytes);
797 			assert(n >= 0);
798 			/*
799 			 * ignore EOF.  zero any partial block to aid
800 			 * compression and emergency recovery of data.
801 			 */
802 			if (n < Tblock)
803 				memset(hbp->data + n, 0, bytes - n);
804 			putblkmany(ar, blksread);
805 		}
806 		close(fd);
807 		if (verbose)
808 			fprint(2, "%s\n", file);
809 	}
810 	if (name)
811 		s_free(name);
812 }
813 
814 static char *
815 replace(char **argv)
816 {
817 	int i, ar;
818 	ulong blksleft, blksread;
819 	Off bytes;
820 	Hdr *hp;
821 	Compress *comp = nil;
822 	Pushstate ps;
823 
824 	if (usefile && docreate) {
825 		ar = create(usefile, OWRITE, 0666);
826 		if (docompress)
827 			comp = compmethod(usefile);
828 	} else if (usefile)
829 		ar = open(usefile, ORDWR);
830 	else
831 		ar = Stdout;
832 	if (comp)
833 		ar = push(ar, comp->comp, Output, &ps);
834 	if (ar < 0)
835 		sysfatal("can't open archive %s: %r", usefile);
836 
837 	if (usefile && !docreate) {
838 		/* skip quickly to the end */
839 		while ((hp = readhdr(ar)) != nil) {
840 			bytes = arsize(hp);
841 			for (blksleft = BYTES2TBLKS(bytes);
842 			     blksleft > 0 && getblkrd(ar, Justnxthdr) != nil;
843 			     blksleft -= blksread) {
844 				blksread = gothowmany(blksleft);
845 				putreadblks(ar, blksread);
846 			}
847 		}
848 		/*
849 		 * we have just read the end-of-archive Tblock.
850 		 * now seek back over the (big) archive block containing it,
851 		 * and back up curblk ptr over end-of-archive Tblock in memory.
852 		 */
853 		if (seek(ar, blkoff, 0) < 0)
854 			sysfatal("can't seek back over end-of-archive: %r");
855 		curblk--;
856 	}
857 
858 	for (i = 0; argv[i] != nil; i++) {
859 		addtoar(ar, argv[i], argv[i]);
860 		chdir(origdir);		/* for correctness & profiling */
861 	}
862 
863 	/* write end-of-archive marker */
864 	getblkz(ar);
865 	putblk(ar);
866 	getblkz(ar);
867 	putlastblk(ar);
868 
869 	if (comp)
870 		return pushclose(&ps);
871 	if (ar > Stderr)
872 		close(ar);
873 	return nil;
874 }
875 
876 /*
877  * tar [xt]
878  */
879 
880 /* is pfx a file-name prefix of name? */
881 static int
882 prefix(char *name, char *pfx)
883 {
884 	int pfxlen = strlen(pfx);
885 	char clpfx[Maxname+1];
886 
887 	if (pfxlen > Maxname)
888 		return 0;
889 	strcpy(clpfx, pfx);
890 	cleanname(clpfx);
891 	return strncmp(clpfx, name, pfxlen) == 0 &&
892 		(name[pfxlen] == '\0' || name[pfxlen] == '/');
893 }
894 
895 static int
896 match(char *name, char **argv)
897 {
898 	int i;
899 	char clname[Maxname+1];
900 
901 	if (argv[0] == nil)
902 		return 1;
903 	strcpy(clname, name);
904 	cleanname(clname);
905 	for (i = 0; argv[i] != nil; i++)
906 		if (prefix(clname, argv[i]))
907 			return 1;
908 	return 0;
909 }
910 
911 static void
912 cantcreate(char *s, int mode)
913 {
914 	int len;
915 	static char *last;
916 
917 	/*
918 	 * Always print about files.  Only print about directories
919 	 * we haven't printed about.  (Assumes archive is ordered
920 	 * nicely.)
921 	 */
922 	if(mode&DMDIR){
923 		if(last){
924 			/* already printed this directory */
925 			if(strcmp(s, last) == 0)
926 				return;
927 			/* printed a higher directory, so printed this one */
928 			len = strlen(s);
929 			if(memcmp(s, last, len) == 0 && last[len] == '/')
930 				return;
931 		}
932 		/* save */
933 		free(last);
934 		last = strdup(s);
935 	}
936 	fprint(2, "%s: can't create %s: %r\n", argv0, s);
937 }
938 
939 static int
940 makedir(char *s)
941 {
942 	int f;
943 
944 	if (access(s, AEXIST) == 0)
945 		return -1;
946 	f = create(s, OREAD, DMDIR | 0777);
947 	if (f >= 0)
948 		close(f);
949 	else
950 		cantcreate(s, DMDIR);
951 	return f;
952 }
953 
954 static int
955 mkpdirs(char *s)
956 {
957 	int err;
958 	char *p;
959 
960 	p = s;
961 	err = 0;
962 	while (!err && (p = strchr(p+1, '/')) != nil) {
963 		*p = '\0';
964 		err = (access(s, AEXIST) < 0 && makedir(s) < 0);
965 		*p = '/';
966 	}
967 	return -err;
968 }
969 
970 /* Call access but preserve the error string. */
971 static int
972 xaccess(char *name, int mode)
973 {
974 	char err[ERRMAX];
975 	int rv;
976 
977 	err[0] = 0;
978 	errstr(err, sizeof err);
979 	rv = access(name, mode);
980 	errstr(err, sizeof err);
981 	return rv;
982 }
983 
984 static int
985 openfname(Hdr *hp, char *fname, int dir, int mode)
986 {
987 	int fd;
988 
989 	fd = -1;
990 	cleanname(fname);
991 	switch (hp->linkflag) {
992 	case LF_LINK:
993 	case LF_SYMLINK1:
994 	case LF_SYMLINK2:
995 		fprint(2, "%s: can't make (sym)link %s\n",
996 			argv0, fname);
997 		break;
998 	case LF_FIFO:
999 		fprint(2, "%s: can't make fifo %s\n", argv0, fname);
1000 		break;
1001 	default:
1002 		if (!keepexisting || access(fname, AEXIST) < 0) {
1003 			int rw = (dir? OREAD: OWRITE);
1004 
1005 			fd = create(fname, rw, mode);
1006 			if (fd < 0) {
1007 				mkpdirs(fname);
1008 				fd = create(fname, rw, mode);
1009 			}
1010 			if (fd < 0 && (!dir || xaccess(fname, AEXIST) < 0))
1011 			    	cantcreate(fname, mode);
1012 		}
1013 		if (fd >= 0 && verbose)
1014 			fprint(2, "%s\n", fname);
1015 		break;
1016 	}
1017 	return fd;
1018 }
1019 
1020 /* copy from archive to file system (or nowhere for table-of-contents) */
1021 static void
1022 copyfromar(int ar, int fd, char *fname, ulong blksleft, Off bytes)
1023 {
1024 	int wrbytes;
1025 	ulong blksread;
1026 	Hdr *hbp;
1027 
1028 	if (blksleft == 0 || bytes < 0)
1029 		bytes = 0;
1030 	for (; blksleft > 0; blksleft -= blksread) {
1031 		hbp = getblkrd(ar, (fd >= 0? Alldata: Justnxthdr));
1032 		if (hbp == nil)
1033 			sysfatal("unexpected EOF on archive extracting %s",
1034 				fname);
1035 		blksread = gothowmany(blksleft);
1036 		if (blksread <= 0) {
1037 			fprint(2, "%s: got %ld blocks reading %s!\n",
1038 				argv0, blksread, fname);
1039 			blksread = 0;
1040 		}
1041 		wrbytes = Tblock*blksread;
1042 		assert(bytes >= 0);
1043 		if(wrbytes > bytes)
1044 			wrbytes = bytes;
1045 		assert(wrbytes >= 0);
1046 		if (fd >= 0)
1047 			ewrite(fname, fd, hbp->data, wrbytes);
1048 		putreadblks(ar, blksread);
1049 		bytes -= wrbytes;
1050 		assert(bytes >= 0);
1051 	}
1052 	if (bytes > 0)
1053 		fprint(2,
1054 	"%s: %lld bytes uncopied at EOF on archive; %s not fully extracted\n",
1055 			argv0, bytes, fname);
1056 }
1057 
1058 static void
1059 wrmeta(int fd, Hdr *hp, long mtime)		/* update metadata */
1060 {
1061 	Dir nd;
1062 
1063 	nulldir(&nd);
1064 	nd.mtime = mtime;
1065 	dirfwstat(fd, &nd);
1066 	if (isustar(hp)) {
1067 		nulldir(&nd);
1068 		nd.gid = hp->gname;
1069 		dirfwstat(fd, &nd);
1070 	}
1071 }
1072 
1073 /*
1074  * copy a file from the archive into the filesystem.
1075  * fname is result of name(), so has two extra bytes at beginning.
1076  */
1077 static void
1078 extract1(int ar, Hdr *hp, char *fname)
1079 {
1080 	int fd = -1, dir = 0;
1081 	long mtime = strtol(hp->mtime, nil, 8);
1082 	ulong mode = strtoul(hp->mode, nil, 8) & 0777;
1083 	Off bytes = hdrsize(hp);		/* for printing */
1084 	ulong blksleft = BYTES2TBLKS(arsize(hp));
1085 
1086 	/* fiddle name, figure out mode and blocks */
1087 	if (isdir(hp)) {
1088 		mode |= DMDIR|0700;
1089 		dir = 1;
1090 	}
1091 	switch (hp->linkflag) {
1092 	case LF_LINK:
1093 	case LF_SYMLINK1:
1094 	case LF_SYMLINK2:
1095 	case LF_FIFO:
1096 		blksleft = 0;
1097 		break;
1098 	}
1099 	if (relative)
1100 		if(fname[0] == '/')
1101 			*--fname = '.';
1102 		else if(fname[0] == '#'){
1103 			*--fname = '/';
1104 			*--fname = '.';
1105 		}
1106 
1107 	if (verb == Xtract)
1108 		fd = openfname(hp, fname, dir, mode);
1109 	else if (verbose) {
1110 		char *cp = ctime(mtime);
1111 
1112 		print("%M %8lld %-12.12s %-4.4s %s\n",
1113 			mode, bytes, cp+4, cp+24, fname);
1114 	} else
1115 		print("%s\n", fname);
1116 
1117 	copyfromar(ar, fd, fname, blksleft, bytes);
1118 
1119 	/* touch up meta data and close */
1120 	if (fd >= 0) {
1121 		/*
1122 		 * directories should be wstated *after* we're done
1123 		 * creating files in them, but we don't do that.
1124 		 */
1125 		if (settime)
1126 			wrmeta(fd, hp, mtime);
1127 		close(fd);
1128 	}
1129 }
1130 
1131 static void
1132 skip(int ar, Hdr *hp, char *fname)
1133 {
1134 	ulong blksleft, blksread;
1135 	Hdr *hbp;
1136 
1137 	for (blksleft = BYTES2TBLKS(arsize(hp)); blksleft > 0;
1138 	     blksleft -= blksread) {
1139 		hbp = getblkrd(ar, Justnxthdr);
1140 		if (hbp == nil)
1141 			sysfatal("unexpected EOF on archive extracting %s",
1142 				fname);
1143 		blksread = gothowmany(blksleft);
1144 		putreadblks(ar, blksread);
1145 	}
1146 }
1147 
1148 static char *
1149 extract(char **argv)
1150 {
1151 	int ar;
1152 	char *longname;
1153 	Hdr *hp;
1154 	Compress *comp = nil;
1155 	Pushstate ps;
1156 
1157 	if (usefile) {
1158 		ar = open(usefile, OREAD);
1159 		comp = compmethod(usefile);
1160 	} else
1161 		ar = Stdin;
1162 	if (comp)
1163 		ar = push(ar, comp->decomp, Input, &ps);
1164 	if (ar < 0)
1165 		sysfatal("can't open archive %s: %r", usefile);
1166 
1167 	while ((hp = readhdr(ar)) != nil) {
1168 		longname = name(hp);
1169 		if (match(longname, argv))
1170 			extract1(ar, hp, longname);
1171 		else
1172 			skip(ar, hp, longname);
1173 	}
1174 
1175 	if (comp)
1176 		return pushclose(&ps);
1177 	if (ar > Stderr)
1178 		close(ar);
1179 	return nil;
1180 }
1181 
1182 void
1183 main(int argc, char *argv[])
1184 {
1185 	int errflg = 0;
1186 	char *ret = nil;
1187 
1188 	fmtinstall('M', dirmodefmt);
1189 
1190 	TARGBEGIN {
1191 	case 'c':
1192 		docreate++;
1193 		verb = Replace;
1194 		break;
1195 	case 'f':
1196 		usefile = arname = EARGF(usage());
1197 		break;
1198 	case 'g':
1199 		argid = strtoul(EARGF(usage()), 0, 0);
1200 		break;
1201 	case 'i':
1202 		ignerrs = 1;
1203 		break;
1204 	case 'k':
1205 		keepexisting++;
1206 		break;
1207 	case 'm':	/* compatibility */
1208 		settime = 0;
1209 		break;
1210 	case 'p':
1211 		posix++;
1212 		break;
1213 	case 'P':
1214 		posix = 0;
1215 		break;
1216 	case 'r':
1217 		verb = Replace;
1218 		break;
1219 	case 'R':
1220 		relative = 0;
1221 		break;
1222 	case 't':
1223 		verb = Toc;
1224 		break;
1225 	case 'T':
1226 		settime++;
1227 		break;
1228 	case 'u':
1229 		aruid = strtoul(EARGF(usage()), 0, 0);
1230 		break;
1231 	case 'v':
1232 		verbose++;
1233 		break;
1234 	case 'x':
1235 		verb = Xtract;
1236 		break;
1237 	case 'z':
1238 		docompress++;
1239 		break;
1240 	case '-':
1241 		break;
1242 	default:
1243 		fprint(2, "tar: unknown letter %C\n", TARGC());
1244 		errflg++;
1245 		break;
1246 	} TARGEND
1247 
1248 	if (argc < 0 || errflg)
1249 		usage();
1250 
1251 	initblks();
1252 	switch (verb) {
1253 	case Toc:
1254 	case Xtract:
1255 		ret = extract(argv);
1256 		break;
1257 	case Replace:
1258 		if (getwd(origdir, sizeof origdir) == nil)
1259 			strcpy(origdir, "/tmp");
1260 		ret = replace(argv);
1261 		break;
1262 	default:
1263 		usage();
1264 		break;
1265 	}
1266 	exits(ret);
1267 }
1268