xref: /plan9/sys/src/cmd/tar.c (revision 6d2133e0d4be943e9ce2e416f00a3b046838334e)
1 /*
2  * tar - `tape archiver', actually usable on any medium.
3  *	POSIX "ustar" compliant when extracting, and by default when creating.
4  *	this tar attempts to read and write multiple Tblock-byte blocks
5  *	at once to and from the filesystem, and does not copy blocks
6  *	around internally.
7  */
8 
9 #include <u.h>
10 #include <libc.h>
11 #include <ctype.h>
12 #include <fcall.h>		/* for %M */
13 #include <String.h>
14 
15 /*
16  * modified versions of those in libc.h; scans only the first arg for
17  * keyletters and options.
18  */
19 #define	TARGBEGIN {\
20 	(argv0 || (argv0 = *argv)), argv++, argc--;\
21 	if (argv[0]) {\
22 		char *_args, *_argt;\
23 		Rune _argc;\
24 		_args = &argv[0][0];\
25 		_argc = 0;\
26 		while(*_args && (_args += chartorune(&_argc, _args)))\
27 			switch(_argc)
28 #define	TARGEND	SET(_argt); USED(_argt);USED(_argc);USED(_args); \
29 	argc--, argv++; } \
30 	USED(argv); USED(argc); }
31 #define	TARGC() (_argc)
32 
33 #define ROUNDUP(a, b)	(((a) + (b) - 1)/(b))
34 #define BYTES2TBLKS(bytes) ROUNDUP(bytes, Tblock)
35 
36 /* read big-endian binary integers; args must be (uchar *) */
37 #define	G2BEBYTE(x)	(((x)[0]<<8)  |  (x)[1])
38 #define	G3BEBYTE(x)	(((x)[0]<<16) | ((x)[1]<<8)  |  (x)[2])
39 #define	G4BEBYTE(x)	(((x)[0]<<24) | ((x)[1]<<16) | ((x)[2]<<8) | (x)[3])
40 #define	G8BEBYTE(x)	(((vlong)G4BEBYTE(x)<<32) | (u32int)G4BEBYTE((x)+4))
41 
42 typedef vlong Off;
43 typedef char *(*Refill)(int ar, char *bufs, int justhdr);
44 
45 enum { Stdin, Stdout, Stderr };
46 enum { Rd, Wr };			/* pipe fd-array indices */
47 enum { Output, Input };
48 enum { None, Toc, Xtract, Replace };
49 enum { Alldata, Justnxthdr };
50 enum {
51 	Tblock = 512,
52 	Namsiz = 100,
53 	Maxpfx = 155,		/* from POSIX */
54 	Maxname = Namsiz + 1 + Maxpfx,
55 	Binsize = 0x80,		/* flag in size[0], from gnu: positive binary size */
56 	Binnegsz = 0xff,	/* flag in size[0]: negative binary size */
57 
58 	Nblock = 40,		/* maximum blocksize */
59 	Dblock = 20,		/* default blocksize */
60 	Debug = 0,
61 };
62 
63 /* POSIX link flags */
64 enum {
65 	LF_PLAIN1 =	'\0',
66 	LF_PLAIN2 =	'0',
67 	LF_LINK =	'1',
68 	LF_SYMLINK1 =	'2',
69 	LF_SYMLINK2 =	's',		/* 4BSD used this */
70 	LF_CHR =	'3',
71 	LF_BLK =	'4',
72 	LF_DIR =	'5',
73 	LF_FIFO =	'6',
74 	LF_CONTIG =	'7',
75 	/* 'A' - 'Z' are reserved for custom implementations */
76 };
77 
78 #define islink(lf)	(isreallink(lf) || issymlink(lf))
79 #define isreallink(lf)	((lf) == LF_LINK)
80 #define issymlink(lf)	((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
81 
82 typedef union {
83 	uchar	data[Tblock];
84 	struct {
85 		char	name[Namsiz];
86 		char	mode[8];
87 		char	uid[8];
88 		char	gid[8];
89 		char	size[12];
90 		char	mtime[12];
91 		char	chksum[8];
92 		char	linkflag;
93 		char	linkname[Namsiz];
94 
95 		/* rest are defined by POSIX's ustar format; see p1003.2b */
96 		char	magic[6];	/* "ustar" */
97 		char	version[2];
98 		char	uname[32];
99 		char	gname[32];
100 		char	devmajor[8];
101 		char	devminor[8];
102 		char	prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
103 	};
104 } Hdr;
105 
106 typedef struct {
107 	char	*comp;
108 	char	*decomp;
109 	char	*sfx[4];
110 } Compress;
111 
112 static Compress comps[] = {
113 	"gzip",		"gunzip",	{ ".tar.gz", ".tgz" },	/* default */
114 	"compress",	"uncompress",	{ ".tar.Z",  ".tz" },
115 	"bzip2",	"bunzip2",	{ ".tar.bz", ".tbz",
116 					  ".tar.bz2",".tbz2" },
117 };
118 
119 typedef struct {
120 	int	kid;
121 	int	fd;	/* original fd */
122 	int	rfd;	/* replacement fd */
123 	int	input;
124 	int	open;
125 } Pushstate;
126 
127 #define OTHER(rdwr) ((rdwr) == Rd? Wr: Rd)
128 
129 static int debug;
130 static int fixednblock;
131 static int verb;
132 static int posix = 1;
133 static int docreate;
134 static int aruid;
135 static int argid;
136 static int relative = 1;
137 static int settime;
138 static int verbose;
139 static int docompress;
140 static int keepexisting;
141 static int ignerrs;		/* flag: ignore i/o errors if possible */
142 static Off blkoff;		/* offset of the current archive block (not Tblock) */
143 static Off nexthdr;
144 
145 static int nblock = Dblock;
146 static int resync;
147 static char *usefile, *arname = "archive";
148 static char origdir[Maxname*2];
149 static Hdr *tpblk, *endblk;
150 static Hdr *curblk;
151 
152 static void
153 usage(void)
154 {
155 	fprint(2, "usage: %s {crtx}[PRTfgikmpsuvz] [archive] [file1 file2...]\n",
156 		argv0);
157 	exits("usage");
158 }
159 
160 /* I/O, with error retry or exit */
161 
162 static int
163 cope(char *name, int fd, void *buf, long len, Off off)
164 {
165 	fprint(2, "%s: %serror reading %s: %r\n", argv0,
166 		(ignerrs? "ignoring ": ""), name);
167 	if (!ignerrs)
168 		exits("read error");
169 
170 	/* pretend we read len bytes of zeroes */
171 	memset(buf, 0, len);
172 	if (off >= 0)			/* seekable? */
173 		seek(fd, off + len, 0);
174 	return len;
175 }
176 
177 static int
178 eread(char *name, int fd, void *buf, long len)
179 {
180 	int rd;
181 	Off off;
182 
183 	off = seek(fd, 0, 1);		/* for coping with errors */
184 	rd = read(fd, buf, len);
185 	if (rd < 0)
186 		rd = cope(name, fd, buf, len, off);
187 	return rd;
188 }
189 
190 static int
191 ereadn(char *name, int fd, void *buf, long len)
192 {
193 	int rd;
194 	Off off;
195 
196 	off = seek(fd, 0, 1);
197 	rd = readn(fd, buf, len);
198 	if (rd < 0)
199 		rd = cope(name, fd, buf, len, off);
200 	return rd;
201 }
202 
203 static int
204 ewrite(char *name, int fd, void *buf, long len)
205 {
206 	int rd;
207 
208 	werrstr("");
209 	rd = write(fd, buf, len);
210 	if (rd != len)
211 		sysfatal("error writing %s: %r", name);
212 	return rd;
213 }
214 
215 /* compression */
216 
217 static Compress *
218 compmethod(char *name)
219 {
220 	int i, nmlen = strlen(name), sfxlen;
221 	Compress *cp;
222 
223 	for (cp = comps; cp < comps + nelem(comps); cp++)
224 		for (i = 0; i < nelem(cp->sfx) && cp->sfx[i]; i++) {
225 			sfxlen = strlen(cp->sfx[i]);
226 			if (nmlen > sfxlen &&
227 			    strcmp(cp->sfx[i], name + nmlen - sfxlen) == 0)
228 				return cp;
229 		}
230 	return docompress? comps: nil;
231 }
232 
233 /*
234  * push a filter, cmd, onto fd.  if input, it's an input descriptor.
235  * returns a descriptor to replace fd, or -1 on error.
236  */
237 static int
238 push(int fd, char *cmd, int input, Pushstate *ps)
239 {
240 	int nfd, pifds[2];
241 	String *s;
242 
243 	ps->open = 0;
244 	ps->fd = fd;
245 	ps->input = input;
246 	if (fd < 0 || pipe(pifds) < 0)
247 		return -1;
248 	ps->kid = fork();
249 	switch (ps->kid) {
250 	case -1:
251 		return -1;
252 	case 0:
253 		if (input)
254 			dup(pifds[Wr], Stdout);
255 		else
256 			dup(pifds[Rd], Stdin);
257 		close(pifds[input? Rd: Wr]);
258 		dup(fd, (input? Stdin: Stdout));
259 		s = s_new();
260 		if (cmd[0] != '/')
261 			s_append(s, "/bin/");
262 		s_append(s, cmd);
263 		execl(s_to_c(s), cmd, nil);
264 		sysfatal("can't exec %s: %r", cmd);
265 	default:
266 		nfd = pifds[input? Rd: Wr];
267 		close(pifds[input? Wr: Rd]);
268 		break;
269 	}
270 	ps->rfd = nfd;
271 	ps->open = 1;
272 	return nfd;
273 }
274 
275 static char *
276 pushclose(Pushstate *ps)
277 {
278 	Waitmsg *wm;
279 
280 	if (ps->fd < 0 || ps->rfd < 0 || !ps->open)
281 		return "not open";
282 	close(ps->rfd);
283 	ps->rfd = -1;
284 	ps->open = 0;
285 	while ((wm = wait()) != nil && wm->pid != ps->kid)
286 		continue;
287 	return wm? wm->msg: nil;
288 }
289 
290 /*
291  * block-buffer management
292  */
293 
294 static void
295 initblks(void)
296 {
297 	free(tpblk);
298 	tpblk = malloc(Tblock * nblock);
299 	assert(tpblk != nil);
300 	endblk = tpblk + nblock;
301 }
302 
303 /*
304  * (re)fill block buffers from archive.  `justhdr' means we don't care
305  * about the data before the next header block.
306  */
307 static char *
308 refill(int ar, char *bufs, int justhdr)
309 {
310 	int i, n;
311 	unsigned bytes = Tblock * nblock;
312 	static int done, first = 1, seekable;
313 
314 	if (done)
315 		return nil;
316 
317 	blkoff = seek(ar, 0, 1);		/* note position for `tar r' */
318 	if (first)
319 		seekable = blkoff >= 0;
320 	/* try to size non-pipe input at first read */
321 	if (first && usefile && !fixednblock) {
322 		n = eread(arname, ar, bufs, bytes);
323 		if (n == 0)
324 			sysfatal("EOF reading archive %s: %r", arname);
325 		i = n;
326 		if (i % Tblock != 0)
327 			sysfatal("%s: archive block size (%d) error", arname, i);
328 		i /= Tblock;
329 		if (i != nblock) {
330 			nblock = i;
331 			fprint(2, "%s: blocking = %d\n", argv0, nblock);
332 			endblk = (Hdr *)bufs + nblock;
333 			bytes = n;
334 		}
335 	} else if (justhdr && seekable && nexthdr - blkoff >= bytes) {
336 		/* optimisation for huge archive members on seekable media */
337 		if (seek(ar, bytes, 1) < 0)
338 			sysfatal("can't seek on archive %s: %r", arname);
339 		n = bytes;
340 	} else
341 		n = ereadn(arname, ar, bufs, bytes);
342 	first = 0;
343 
344 	if (n == 0)
345 		sysfatal("unexpected EOF reading archive %s", arname);
346 	if (n % Tblock != 0)
347 		sysfatal("partial block read from archive %s", arname);
348 	if (n != bytes) {
349 		done = 1;
350 		memset(bufs + n, 0, bytes - n);
351 	}
352 	return bufs;
353 }
354 
355 static Hdr *
356 getblk(int ar, Refill rfp, int justhdr)
357 {
358 	if (curblk == nil || curblk >= endblk) {  /* input block exhausted? */
359 		if (rfp != nil && (*rfp)(ar, (char *)tpblk, justhdr) == nil)
360 			return nil;
361 		curblk = tpblk;
362 	}
363 	return curblk++;
364 }
365 
366 static Hdr *
367 getblkrd(int ar, int justhdr)
368 {
369 	return getblk(ar, refill, justhdr);
370 }
371 
372 static Hdr *
373 getblke(int ar)
374 {
375 	return getblk(ar, nil, Alldata);
376 }
377 
378 static Hdr *
379 getblkz(int ar)
380 {
381 	Hdr *hp = getblke(ar);
382 
383 	if (hp != nil)
384 		memset(hp->data, 0, Tblock);
385 	return hp;
386 }
387 
388 /*
389  * how many block buffers are available, starting at the address
390  * just returned by getblk*?
391  */
392 static int
393 gothowmany(int max)
394 {
395 	int n = endblk - (curblk - 1);
396 
397 	return n > max? max: n;
398 }
399 
400 /*
401  * indicate that one is done with the last block obtained from getblke
402  * and it is now available to be written into the archive.
403  */
404 static void
405 putlastblk(int ar)
406 {
407 	unsigned bytes = Tblock * nblock;
408 
409 	/* if writing end-of-archive, aid compression (good hygiene too) */
410 	if (curblk < endblk)
411 		memset(curblk, 0, (char *)endblk - (char *)curblk);
412 	ewrite(arname, ar, tpblk, bytes);
413 }
414 
415 static void
416 putblk(int ar)
417 {
418 	if (curblk >= endblk)
419 		putlastblk(ar);
420 }
421 
422 static void
423 putbackblk(int ar)
424 {
425 	curblk--;
426 	USED(ar);
427 }
428 
429 static void
430 putreadblks(int ar, int blks)
431 {
432 	curblk += blks - 1;
433 	USED(ar);
434 }
435 
436 static void
437 putblkmany(int ar, int blks)
438 {
439 	assert(blks > 0);
440 	curblk += blks - 1;
441 	putblk(ar);
442 }
443 
444 /*
445  * common routines
446  */
447 
448 /*
449  * modifies hp->chksum but restores it; important for the last block of the
450  * old archive when updating with `tar rf archive'
451  */
452 static long
453 chksum(Hdr *hp)
454 {
455 	int n = Tblock;
456 	long i = 0;
457 	uchar *cp = hp->data;
458 	char oldsum[sizeof hp->chksum];
459 
460 	memmove(oldsum, hp->chksum, sizeof oldsum);
461 	memset(hp->chksum, ' ', sizeof hp->chksum);
462 	while (n-- > 0)
463 		i += *cp++;
464 	memmove(hp->chksum, oldsum, sizeof oldsum);
465 	return i;
466 }
467 
468 static int
469 isustar(Hdr *hp)
470 {
471 	return strcmp(hp->magic, "ustar") == 0;
472 }
473 
474 /*
475  * s is at most n bytes long, but need not be NUL-terminated.
476  * if shorter than n bytes, all bytes after the first NUL must also
477  * be NUL.
478  */
479 static int
480 strnlen(char *s, int n)
481 {
482 	return s[n - 1] != '\0'? n: strlen(s);
483 }
484 
485 /* set fullname from header */
486 static char *
487 name(Hdr *hp)
488 {
489 	int pfxlen, namlen;
490 	char *fullname;
491 	static char fullnamebuf[2+Maxname+1];  /* 2+ for ./ on relative names */
492 
493 	fullname = fullnamebuf+2;
494 	namlen = strnlen(hp->name, sizeof hp->name);
495 	if (hp->prefix[0] == '\0' || !isustar(hp)) {	/* old-style name? */
496 		memmove(fullname, hp->name, namlen);
497 		fullname[namlen] = '\0';
498 		return fullname;
499 	}
500 
501 	/* name is in two pieces */
502 	pfxlen = strnlen(hp->prefix, sizeof hp->prefix);
503 	memmove(fullname, hp->prefix, pfxlen);
504 	fullname[pfxlen] = '/';
505 	memmove(fullname + pfxlen + 1, hp->name, namlen);
506 	fullname[pfxlen + 1 + namlen] = '\0';
507 	return fullname;
508 }
509 
510 static int
511 isdir(Hdr *hp)
512 {
513 	/* the mode test is ugly but sometimes necessary */
514 	return hp->linkflag == LF_DIR ||
515 		strrchr(name(hp), '\0')[-1] == '/' ||
516 		(strtoul(hp->mode, nil, 8)&0170000) == 040000;
517 }
518 
519 static int
520 eotar(Hdr *hp)
521 {
522 	return name(hp)[0] == '\0';
523 }
524 
525 /*
526 static uvlong
527 getbe(uchar *src, int size)
528 {
529 	uvlong vl = 0;
530 
531 	while (size-- > 0) {
532 		vl <<= 8;
533 		vl |= *src++;
534 	}
535 	return vl;
536 }
537  */
538 
539 static void
540 putbe(uchar *dest, uvlong vl, int size)
541 {
542 	for (dest += size; size-- > 0; vl >>= 8)
543 		*--dest = vl;
544 }
545 
546 /*
547  * cautious parsing of octal numbers as ascii strings in
548  * a tar header block.  this is particularly important for
549  * trusting the checksum when trying to resync.
550  */
551 static uvlong
552 hdrotoull(char *st, char *end, uvlong errval, char *name, char *field)
553 {
554 	char *numb;
555 
556 	for (numb = st; (*numb == ' ' || *numb == '\0') && numb < end; numb++)
557 		;
558 	if (numb < end && isascii(*numb) && isdigit(*numb))
559 		return strtoull(numb, nil, 8);
560 	else if (numb >= end)
561 		fprint(2, "%s: %s: empty %s in header\n", argv0, name, field);
562 	else
563 		fprint(2, "%s: %s: %s: non-numeric %s in header\n",
564 			argv0, name, numb, field);
565 	return errval;
566 }
567 
568 /*
569  * return the nominal size from the header block, which is not always the
570  * size in the archive (the archive size may be zero for some file types
571  * regardless of the nominal size).
572  *
573  * gnu and freebsd tars are now recording vlongs as big-endian binary
574  * with a flag in byte 0 to indicate this, which permits file sizes up to
575  * 2^64-1 (actually 2^80-1 but our file sizes are vlongs) rather than 2^33-1.
576  */
577 static Off
578 hdrsize(Hdr *hp)
579 {
580 	uchar *p;
581 
582 	if((uchar)hp->size[0] == Binnegsz) {
583 		fprint(2, "%s: %s: negative length, which is insane\n",
584 			argv0, name(hp));
585 		return 0;
586 	} else if((uchar)hp->size[0] == Binsize) {
587 		p = (uchar *)hp->size + sizeof hp->size - 1 -
588 			sizeof(vlong);		/* -1 for terminating space */
589 		return G8BEBYTE(p);
590 	}
591 
592 	return hdrotoull(hp->size, hp->size + sizeof hp->size, 0,
593 		name(hp), "size");
594 }
595 
596 /*
597  * return the number of bytes recorded in the archive.
598  */
599 static Off
600 arsize(Hdr *hp)
601 {
602 	if(isdir(hp) || islink(hp->linkflag))
603 		return 0;
604 	return hdrsize(hp);
605 }
606 
607 static long
608 parsecksum(char *cksum, char *name)
609 {
610 	Hdr *hp;
611 
612 	return hdrotoull(cksum, cksum + sizeof hp->chksum, (uvlong)-1LL,
613 		name, "checksum");
614 }
615 
616 static Hdr *
617 readhdr(int ar)
618 {
619 	long hdrcksum;
620 	Hdr *hp;
621 
622 	hp = getblkrd(ar, Alldata);
623 	if (hp == nil)
624 		sysfatal("unexpected EOF instead of archive header in %s",
625 			arname);
626 	if (eotar(hp))			/* end-of-archive block? */
627 		return nil;
628 
629 	hdrcksum = parsecksum(hp->chksum, name(hp));
630 	if (hdrcksum == -1 || chksum(hp) != hdrcksum) {
631 		if (!resync)
632 			sysfatal("bad archive header checksum in %s: "
633 				"name %.100s...; expected %#luo got %#luo",
634 				arname, hp->name, hdrcksum, chksum(hp));
635 		fprint(2, "%s: skipping past archive header with bad checksum in %s...",
636 			argv0, arname);
637 		do {
638 			hp = getblkrd(ar, Alldata);
639 			if (hp == nil)
640 				sysfatal("unexpected EOF looking for archive header in %s",
641 					arname);
642 			hdrcksum = parsecksum(hp->chksum, name(hp));
643 		} while (hdrcksum == -1 || chksum(hp) != hdrcksum);
644 		fprint(2, "found %s\n", name(hp));
645 	}
646 	nexthdr += Tblock*(1 + BYTES2TBLKS(arsize(hp)));
647 	return hp;
648 }
649 
650 /*
651  * tar r[c]
652  */
653 
654 /*
655  * if name is longer than Namsiz bytes, try to split it at a slash and fit the
656  * pieces into hp->prefix and hp->name.
657  */
658 static int
659 putfullname(Hdr *hp, char *name)
660 {
661 	int namlen, pfxlen;
662 	char *sl, *osl;
663 	String *slname = nil;
664 
665 	if (isdir(hp)) {
666 		slname = s_new();
667 		s_append(slname, name);
668 		s_append(slname, "/");		/* posix requires this */
669 		name = s_to_c(slname);
670 	}
671 
672 	namlen = strlen(name);
673 	if (namlen <= Namsiz) {
674 		strncpy(hp->name, name, Namsiz);
675 		hp->prefix[0] = '\0';		/* ustar paranoia */
676 		return 0;
677 	}
678 
679 	if (!posix || namlen > Maxname) {
680 		fprint(2, "%s: name too long for tar header: %s\n",
681 			argv0, name);
682 		return -1;
683 	}
684 	/*
685 	 * try various splits until one results in pieces that fit into the
686 	 * appropriate fields of the header.  look for slashes from right
687 	 * to left, in the hopes of putting the largest part of the name into
688 	 * hp->prefix, which is larger than hp->name.
689 	 */
690 	sl = strrchr(name, '/');
691 	while (sl != nil) {
692 		pfxlen = sl - name;
693 		if (pfxlen <= sizeof hp->prefix && namlen-1 - pfxlen <= Namsiz)
694 			break;
695 		osl = sl;
696 		*osl = '\0';
697 		sl = strrchr(name, '/');
698 		*osl = '/';
699 	}
700 	if (sl == nil) {
701 		fprint(2, "%s: name can't be split to fit tar header: %s\n",
702 			argv0, name);
703 		return -1;
704 	}
705 	*sl = '\0';
706 	strncpy(hp->prefix, name, sizeof hp->prefix);
707 	*sl++ = '/';
708 	strncpy(hp->name, sl, sizeof hp->name);
709 	if (slname)
710 		s_free(slname);
711 	return 0;
712 }
713 
714 static int
715 mkhdr(Hdr *hp, Dir *dir, char *file)
716 {
717 	/*
718 	 * some of these fields run together, so we format them left-to-right
719 	 * and don't use snprint.
720 	 */
721 	sprint(hp->mode, "%6lo ", dir->mode & 0777);
722 	sprint(hp->uid, "%6o ", aruid);
723 	sprint(hp->gid, "%6o ", argid);
724 	if (dir->length >= (Off)1<<32) {
725 		static int printed;
726 
727 		if (!printed) {
728 			printed = 1;
729 			fprint(2, "%s: storing large sizes in \"base 256\"\n", argv0);
730 		}
731 		hp->size[0] = Binsize;
732 		/* emit so-called `base 256' representation of size */
733 		putbe((uchar *)hp->size+1, dir->length, sizeof hp->size - 2);
734 		hp->size[sizeof hp->size - 1] = ' ';
735 	} else
736 		sprint(hp->size, "%11lluo ", dir->length);
737 	sprint(hp->mtime, "%11luo ", dir->mtime);
738 	hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
739 	putfullname(hp, file);
740 	if (posix) {
741 		strncpy(hp->magic, "ustar", sizeof hp->magic);
742 		strncpy(hp->version, "00", sizeof hp->version);
743 		strncpy(hp->uname, dir->uid, sizeof hp->uname);
744 		strncpy(hp->gname, dir->gid, sizeof hp->gname);
745 	}
746 	sprint(hp->chksum, "%6luo", chksum(hp));
747 	return 0;
748 }
749 
750 static void addtoar(int ar, char *file, char *shortf);
751 
752 static void
753 addtreetoar(int ar, char *file, char *shortf, int fd)
754 {
755 	int n;
756 	Dir *dent, *dirents;
757 	String *name = s_new();
758 
759 	n = dirreadall(fd, &dirents);
760 	if (n < 0)
761 		fprint(2, "%s: dirreadall %s: %r\n", argv0, file);
762 	close(fd);
763 	if (n <= 0)
764 		return;
765 
766 	if (chdir(shortf) < 0)
767 		sysfatal("chdir %s: %r", file);
768 	if (Debug)
769 		fprint(2, "chdir %s\t# %s\n", shortf, file);
770 
771 	for (dent = dirents; dent < dirents + n; dent++) {
772 		s_reset(name);
773 		s_append(name, file);
774 		s_append(name, "/");
775 		s_append(name, dent->name);
776 		addtoar(ar, s_to_c(name), dent->name);
777 	}
778 	s_free(name);
779 	free(dirents);
780 
781 	/*
782 	 * this assumes that shortf is just one component, which is true
783 	 * during directory descent, but not necessarily true of command-line
784 	 * arguments.  Our caller (or addtoar's) must reset the working
785 	 * directory if necessary.
786 	 */
787 	if (chdir("..") < 0)
788 		sysfatal("chdir %s/..: %r", file);
789 	if (Debug)
790 		fprint(2, "chdir ..\n");
791 }
792 
793 static void
794 addtoar(int ar, char *file, char *shortf)
795 {
796 	int n, fd, isdir;
797 	long bytes, blksread;
798 	ulong blksleft;
799 	Hdr *hbp;
800 	Dir *dir;
801 	String *name = nil;
802 
803 	if (shortf[0] == '#') {
804 		name = s_new();
805 		s_append(name, "./");
806 		s_append(name, shortf);
807 		shortf = s_to_c(name);
808 	}
809 
810 	if (Debug)
811 		fprint(2, "opening %s	# %s\n", shortf, file);
812 	fd = open(shortf, OREAD);
813 	if (fd < 0) {
814 		fprint(2, "%s: can't open %s: %r\n", argv0, file);
815 		if (name)
816 			s_free(name);
817 		return;
818 	}
819 	dir = dirfstat(fd);
820 	if (dir == nil)
821 		sysfatal("can't fstat %s: %r", file);
822 
823 	hbp = getblkz(ar);
824 	isdir = (dir->qid.type & QTDIR) != 0;
825 	if (mkhdr(hbp, dir, file) < 0) {
826 		putbackblk(ar);
827 		free(dir);
828 		close(fd);
829 		if (name)
830 			s_free(name);
831 		return;
832 	}
833 	putblk(ar);
834 
835 	blksleft = BYTES2TBLKS(dir->length);
836 	free(dir);
837 
838 	if (isdir)
839 		addtreetoar(ar, file, shortf, fd);
840 	else {
841 		for (; blksleft > 0; blksleft -= blksread) {
842 			hbp = getblke(ar);
843 			blksread = gothowmany(blksleft);
844 			assert(blksread >= 0);
845 			bytes = blksread * Tblock;
846 			n = ereadn(file, fd, hbp->data, bytes);
847 			assert(n >= 0);
848 			/*
849 			 * ignore EOF.  zero any partial block to aid
850 			 * compression and emergency recovery of data.
851 			 */
852 			if (n < Tblock)
853 				memset(hbp->data + n, 0, bytes - n);
854 			putblkmany(ar, blksread);
855 		}
856 		close(fd);
857 		if (verbose)
858 			fprint(2, "%s\n", file);
859 	}
860 	if (name)
861 		s_free(name);
862 }
863 
864 static char *
865 replace(char **argv)
866 {
867 	int i, ar;
868 	ulong blksleft, blksread;
869 	Off bytes;
870 	Hdr *hp;
871 	Compress *comp = nil;
872 	Pushstate ps;
873 
874 	if (usefile && docreate) {
875 		ar = create(usefile, OWRITE, 0666);
876 		if (docompress)
877 			comp = compmethod(usefile);
878 	} else if (usefile)
879 		ar = open(usefile, ORDWR);
880 	else
881 		ar = Stdout;
882 	if (comp)
883 		ar = push(ar, comp->comp, Output, &ps);
884 	if (ar < 0)
885 		sysfatal("can't open archive %s: %r", usefile);
886 
887 	if (usefile && !docreate) {
888 		/* skip quickly to the end */
889 		while ((hp = readhdr(ar)) != nil) {
890 			bytes = arsize(hp);
891 			for (blksleft = BYTES2TBLKS(bytes);
892 			     blksleft > 0 && getblkrd(ar, Justnxthdr) != nil;
893 			     blksleft -= blksread) {
894 				blksread = gothowmany(blksleft);
895 				putreadblks(ar, blksread);
896 			}
897 		}
898 		/*
899 		 * we have just read the end-of-archive Tblock.
900 		 * now seek back over the (big) archive block containing it,
901 		 * and back up curblk ptr over end-of-archive Tblock in memory.
902 		 */
903 		if (seek(ar, blkoff, 0) < 0)
904 			sysfatal("can't seek back over end-of-archive in %s: %r",
905 				arname);
906 		curblk--;
907 	}
908 
909 	for (i = 0; argv[i] != nil; i++) {
910 		addtoar(ar, argv[i], argv[i]);
911 		chdir(origdir);		/* for correctness & profiling */
912 	}
913 
914 	/* write end-of-archive marker */
915 	getblkz(ar);
916 	putblk(ar);
917 	getblkz(ar);
918 	putlastblk(ar);
919 
920 	if (comp)
921 		return pushclose(&ps);
922 	if (ar > Stderr)
923 		close(ar);
924 	return nil;
925 }
926 
927 /*
928  * tar [xt]
929  */
930 
931 /* is pfx a file-name prefix of name? */
932 static int
933 prefix(char *name, char *pfx)
934 {
935 	int pfxlen = strlen(pfx);
936 	char clpfx[Maxname+1];
937 
938 	if (pfxlen > Maxname)
939 		return 0;
940 	strcpy(clpfx, pfx);
941 	cleanname(clpfx);
942 	return strncmp(clpfx, name, pfxlen) == 0 &&
943 		(name[pfxlen] == '\0' || name[pfxlen] == '/');
944 }
945 
946 static int
947 match(char *name, char **argv)
948 {
949 	int i;
950 	char clname[Maxname+1];
951 
952 	if (argv[0] == nil)
953 		return 1;
954 	strcpy(clname, name);
955 	cleanname(clname);
956 	for (i = 0; argv[i] != nil; i++)
957 		if (prefix(clname, argv[i]))
958 			return 1;
959 	return 0;
960 }
961 
962 static void
963 cantcreate(char *s, int mode)
964 {
965 	int len;
966 	static char *last;
967 
968 	/*
969 	 * Always print about files.  Only print about directories
970 	 * we haven't printed about.  (Assumes archive is ordered
971 	 * nicely.)
972 	 */
973 	if(mode&DMDIR){
974 		if(last){
975 			/* already printed this directory */
976 			if(strcmp(s, last) == 0)
977 				return;
978 			/* printed a higher directory, so printed this one */
979 			len = strlen(s);
980 			if(memcmp(s, last, len) == 0 && last[len] == '/')
981 				return;
982 		}
983 		/* save */
984 		free(last);
985 		last = strdup(s);
986 	}
987 	fprint(2, "%s: can't create %s: %r\n", argv0, s);
988 }
989 
990 static int
991 makedir(char *s)
992 {
993 	int f;
994 
995 	if (access(s, AEXIST) == 0)
996 		return -1;
997 	f = create(s, OREAD, DMDIR | 0777);
998 	if (f >= 0)
999 		close(f);
1000 	else
1001 		cantcreate(s, DMDIR);
1002 	return f;
1003 }
1004 
1005 static int
1006 mkpdirs(char *s)
1007 {
1008 	int err;
1009 	char *p;
1010 
1011 	p = s;
1012 	err = 0;
1013 	while (!err && (p = strchr(p+1, '/')) != nil) {
1014 		*p = '\0';
1015 		err = (access(s, AEXIST) < 0 && makedir(s) < 0);
1016 		*p = '/';
1017 	}
1018 	return -err;
1019 }
1020 
1021 /* Call access but preserve the error string. */
1022 static int
1023 xaccess(char *name, int mode)
1024 {
1025 	char err[ERRMAX];
1026 	int rv;
1027 
1028 	err[0] = 0;
1029 	errstr(err, sizeof err);
1030 	rv = access(name, mode);
1031 	errstr(err, sizeof err);
1032 	return rv;
1033 }
1034 
1035 static int
1036 openfname(Hdr *hp, char *fname, int dir, int mode)
1037 {
1038 	int fd;
1039 
1040 	fd = -1;
1041 	cleanname(fname);
1042 	switch (hp->linkflag) {
1043 	case LF_LINK:
1044 	case LF_SYMLINK1:
1045 	case LF_SYMLINK2:
1046 		fprint(2, "%s: can't make (sym)link %s\n",
1047 			argv0, fname);
1048 		break;
1049 	case LF_FIFO:
1050 		fprint(2, "%s: can't make fifo %s\n", argv0, fname);
1051 		break;
1052 	default:
1053 		if (!keepexisting || access(fname, AEXIST) < 0) {
1054 			int rw = (dir? OREAD: OWRITE);
1055 
1056 			fd = create(fname, rw, mode);
1057 			if (fd < 0) {
1058 				mkpdirs(fname);
1059 				fd = create(fname, rw, mode);
1060 			}
1061 			if (fd < 0 && (!dir || xaccess(fname, AEXIST) < 0))
1062 			    	cantcreate(fname, mode);
1063 		}
1064 		if (fd >= 0 && verbose)
1065 			fprint(2, "%s\n", fname);
1066 		break;
1067 	}
1068 	return fd;
1069 }
1070 
1071 /* copy from archive to file system (or nowhere for table-of-contents) */
1072 static void
1073 copyfromar(int ar, int fd, char *fname, ulong blksleft, Off bytes)
1074 {
1075 	int wrbytes;
1076 	ulong blksread;
1077 	Hdr *hbp;
1078 
1079 	if (blksleft == 0 || bytes < 0)
1080 		bytes = 0;
1081 	for (; blksleft > 0; blksleft -= blksread) {
1082 		hbp = getblkrd(ar, (fd >= 0? Alldata: Justnxthdr));
1083 		if (hbp == nil)
1084 			sysfatal("unexpected EOF on archive extracting %s from %s",
1085 				fname, arname);
1086 		blksread = gothowmany(blksleft);
1087 		if (blksread <= 0) {
1088 			fprint(2, "%s: got %ld blocks reading %s!\n",
1089 				argv0, blksread, fname);
1090 			blksread = 0;
1091 		}
1092 		wrbytes = Tblock*blksread;
1093 		assert(bytes >= 0);
1094 		if(wrbytes > bytes)
1095 			wrbytes = bytes;
1096 		assert(wrbytes >= 0);
1097 		if (fd >= 0)
1098 			ewrite(fname, fd, hbp->data, wrbytes);
1099 		putreadblks(ar, blksread);
1100 		bytes -= wrbytes;
1101 		assert(bytes >= 0);
1102 	}
1103 	if (bytes > 0)
1104 		fprint(2, "%s: %lld bytes uncopied at EOF on archive %s; "
1105 			"%s not fully extracted\n", argv0, bytes, arname, fname);
1106 }
1107 
1108 static void
1109 wrmeta(int fd, Hdr *hp, long mtime, int mode)		/* update metadata */
1110 {
1111 	Dir nd;
1112 
1113 	nulldir(&nd);
1114 	nd.mtime = mtime;
1115 	nd.mode = mode;
1116 	dirfwstat(fd, &nd);
1117 	if (isustar(hp)) {
1118 		nulldir(&nd);
1119 		nd.gid = hp->gname;
1120 		dirfwstat(fd, &nd);
1121 		nulldir(&nd);
1122 		nd.uid = hp->uname;
1123 		dirfwstat(fd, &nd);
1124 	}
1125 }
1126 
1127 /*
1128  * copy a file from the archive into the filesystem.
1129  * fname is result of name(), so has two extra bytes at beginning.
1130  */
1131 static void
1132 extract1(int ar, Hdr *hp, char *fname)
1133 {
1134 	int fd = -1, dir = 0;
1135 	long mtime = strtol(hp->mtime, nil, 8);
1136 	ulong mode = strtoul(hp->mode, nil, 8) & 0777;
1137 	Off bytes = hdrsize(hp);		/* for printing */
1138 	ulong blksleft = BYTES2TBLKS(arsize(hp));
1139 
1140 	/* fiddle name, figure out mode and blocks */
1141 	if (isdir(hp)) {
1142 		mode |= DMDIR|0700;
1143 		dir = 1;
1144 	}
1145 	switch (hp->linkflag) {
1146 	case LF_LINK:
1147 	case LF_SYMLINK1:
1148 	case LF_SYMLINK2:
1149 	case LF_FIFO:
1150 		blksleft = 0;
1151 		break;
1152 	}
1153 	if (relative)
1154 		if(fname[0] == '/')
1155 			*--fname = '.';
1156 		else if(fname[0] == '#'){
1157 			*--fname = '/';
1158 			*--fname = '.';
1159 		}
1160 
1161 	if (verb == Xtract)
1162 		fd = openfname(hp, fname, dir, mode);
1163 	else if (verbose) {
1164 		char *cp = ctime(mtime);
1165 
1166 		print("%M %8lld %-12.12s %-4.4s %s\n",
1167 			mode, bytes, cp+4, cp+24, fname);
1168 	} else
1169 		print("%s\n", fname);
1170 
1171 	copyfromar(ar, fd, fname, blksleft, bytes);
1172 
1173 	/* touch up meta data and close */
1174 	if (fd >= 0) {
1175 		/*
1176 		 * directories should be wstated *after* we're done
1177 		 * creating files in them, but we don't do that.
1178 		 */
1179 		if (settime)
1180 			wrmeta(fd, hp, mtime, mode);
1181 		close(fd);
1182 	}
1183 }
1184 
1185 static void
1186 skip(int ar, Hdr *hp, char *fname)
1187 {
1188 	ulong blksleft, blksread;
1189 	Hdr *hbp;
1190 
1191 	for (blksleft = BYTES2TBLKS(arsize(hp)); blksleft > 0;
1192 	     blksleft -= blksread) {
1193 		hbp = getblkrd(ar, Justnxthdr);
1194 		if (hbp == nil)
1195 			sysfatal("unexpected EOF on archive extracting %s from %s",
1196 				fname, arname);
1197 		blksread = gothowmany(blksleft);
1198 		putreadblks(ar, blksread);
1199 	}
1200 }
1201 
1202 static char *
1203 extract(char **argv)
1204 {
1205 	int ar;
1206 	char *longname;
1207 	Hdr *hp;
1208 	Compress *comp = nil;
1209 	Pushstate ps;
1210 
1211 	if (usefile) {
1212 		ar = open(usefile, OREAD);
1213 		comp = compmethod(usefile);
1214 	} else
1215 		ar = Stdin;
1216 	if (comp)
1217 		ar = push(ar, comp->decomp, Input, &ps);
1218 	if (ar < 0)
1219 		sysfatal("can't open archive %s: %r", usefile);
1220 
1221 	while ((hp = readhdr(ar)) != nil) {
1222 		longname = name(hp);
1223 		if (match(longname, argv))
1224 			extract1(ar, hp, longname);
1225 		else
1226 			skip(ar, hp, longname);
1227 	}
1228 
1229 	if (comp)
1230 		return pushclose(&ps);
1231 	if (ar > Stderr)
1232 		close(ar);
1233 	return nil;
1234 }
1235 
1236 void
1237 main(int argc, char *argv[])
1238 {
1239 	int errflg = 0;
1240 	char *ret = nil;
1241 
1242 	fmtinstall('M', dirmodefmt);
1243 
1244 	TARGBEGIN {
1245 	case 'c':
1246 		docreate++;
1247 		verb = Replace;
1248 		break;
1249 	case 'f':
1250 		usefile = arname = EARGF(usage());
1251 		break;
1252 	case 'g':
1253 		argid = strtoul(EARGF(usage()), 0, 0);
1254 		break;
1255 	case 'i':
1256 		ignerrs = 1;
1257 		break;
1258 	case 'k':
1259 		keepexisting++;
1260 		break;
1261 	case 'm':	/* compatibility */
1262 		settime = 0;
1263 		break;
1264 	case 'p':
1265 		posix++;
1266 		break;
1267 	case 'P':
1268 		posix = 0;
1269 		break;
1270 	case 'r':
1271 		verb = Replace;
1272 		break;
1273 	case 'R':
1274 		relative = 0;
1275 		break;
1276 	case 's':
1277 		resync++;
1278 		break;
1279 	case 't':
1280 		verb = Toc;
1281 		break;
1282 	case 'T':
1283 		settime++;
1284 		break;
1285 	case 'u':
1286 		aruid = strtoul(EARGF(usage()), 0, 0);
1287 		break;
1288 	case 'v':
1289 		verbose++;
1290 		break;
1291 	case 'x':
1292 		verb = Xtract;
1293 		break;
1294 	case 'z':
1295 		docompress++;
1296 		break;
1297 	case '-':
1298 		break;
1299 	default:
1300 		fprint(2, "tar: unknown letter %C\n", TARGC());
1301 		errflg++;
1302 		break;
1303 	} TARGEND
1304 
1305 	if (argc < 0 || errflg)
1306 		usage();
1307 
1308 	initblks();
1309 	switch (verb) {
1310 	case Toc:
1311 	case Xtract:
1312 		ret = extract(argv);
1313 		break;
1314 	case Replace:
1315 		if (getwd(origdir, sizeof origdir) == nil)
1316 			strcpy(origdir, "/tmp");
1317 		ret = replace(argv);
1318 		break;
1319 	default:
1320 		usage();
1321 		break;
1322 	}
1323 	exits(ret);
1324 }
1325