xref: /plan9/sys/src/cmd/tar.c (revision 70f8d5d20cdf22bd7afcab3bf516824c1342bf79)
1 /*
2  * tar - `tape archiver', actually usable on any medium.
3  *	POSIX "ustar" compliant when extracting, and by default when creating.
4  *	this tar attempts to read and write multiple Tblock-byte blocks
5  *	at once to and from the filesystem, and does not copy blocks
6  *	around internally.
7  */
8 
9 #include <u.h>
10 #include <libc.h>
11 #include <ctype.h>
12 #include <fcall.h>		/* for %M */
13 #include <String.h>
14 
15 /*
16  * modified versions of those in libc.h; scans only the first arg for
17  * keyletters and options.
18  */
19 #define	TARGBEGIN {\
20 	(argv0 || (argv0 = *argv)), argv++, argc--;\
21 	if (argv[0]) {\
22 		char *_args, *_argt;\
23 		Rune _argc;\
24 		_args = &argv[0][0];\
25 		_argc = 0;\
26 		while(*_args && (_args += chartorune(&_argc, _args)))\
27 			switch(_argc)
28 #define	TARGEND	SET(_argt); USED(_argt);USED(_argc);USED(_args); \
29 	argc--, argv++; } \
30 	USED(argv); USED(argc); }
31 #define	TARGC() (_argc)
32 
33 #define HOWMANY(a, size)	(((a) + (size) - 1) / (size))
34 #define BYTES2TBLKS(bytes)	HOWMANY(bytes, Tblock)
35 
36 /* read big-endian binary integers; args must be (uchar *) */
37 #define	G2BEBYTE(x)	(((x)[0]<<8)  |  (x)[1])
38 #define	G3BEBYTE(x)	(((x)[0]<<16) | ((x)[1]<<8)  |  (x)[2])
39 #define	G4BEBYTE(x)	(((x)[0]<<24) | ((x)[1]<<16) | ((x)[2]<<8) | (x)[3])
40 #define	G8BEBYTE(x)	(((vlong)G4BEBYTE(x)<<32) | (u32int)G4BEBYTE((x)+4))
41 
42 typedef vlong Off;
43 typedef char *(*Refill)(int ar, char *bufs, int justhdr);
44 
45 enum { Stdin, Stdout, Stderr };
46 enum { Rd, Wr };			/* pipe fd-array indices */
47 enum { Output, Input };
48 enum { None, Toc, Xtract, Replace };
49 enum { Alldata, Justnxthdr };
50 enum {
51 	Tblock = 512,
52 	Namsiz = 100,
53 	Maxpfx = 155,		/* from POSIX */
54 	Maxname = Namsiz + 1 + Maxpfx,
55 	Binsize = 0x80,		/* flag in size[0], from gnu: positive binary size */
56 	Binnegsz = 0xff,	/* flag in size[0]: negative binary size */
57 
58 	Nblock = 40,		/* maximum blocksize */
59 	Dblock = 20,		/* default blocksize */
60 	Debug = 0,
61 };
62 
63 /* POSIX link flags */
64 enum {
65 	LF_PLAIN1 =	'\0',
66 	LF_PLAIN2 =	'0',
67 	LF_LINK =	'1',
68 	LF_SYMLINK1 =	'2',
69 	LF_SYMLINK2 =	's',		/* 4BSD used this */
70 	LF_CHR =	'3',
71 	LF_BLK =	'4',
72 	LF_DIR =	'5',
73 	LF_FIFO =	'6',
74 	LF_CONTIG =	'7',
75 	/* 'A' - 'Z' are reserved for custom implementations */
76 };
77 
78 #define islink(lf)	(isreallink(lf) || issymlink(lf))
79 #define isreallink(lf)	((lf) == LF_LINK)
80 #define issymlink(lf)	((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
81 
82 typedef union {
83 	uchar	data[Tblock];
84 	struct {
85 		char	name[Namsiz];
86 		char	mode[8];
87 		char	uid[8];
88 		char	gid[8];
89 		char	size[12];
90 		char	mtime[12];
91 		char	chksum[8];
92 		char	linkflag;
93 		char	linkname[Namsiz];
94 
95 		/* rest are defined by POSIX's ustar format; see p1003.2b */
96 		char	magic[6];	/* "ustar" */
97 		char	version[2];
98 		char	uname[32];
99 		char	gname[32];
100 		char	devmajor[8];
101 		char	devminor[8];
102 		char	prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
103 	};
104 } Hdr;
105 
106 typedef struct {
107 	char	*comp;
108 	char	*decomp;
109 	char	*sfx[4];
110 } Compress;
111 
112 static Compress comps[] = {
113 	"gzip",		"gunzip",	{ ".tar.gz", ".tgz" },	/* default */
114 	"compress",	"uncompress",	{ ".tar.Z",  ".tz" },
115 	"bzip2",	"bunzip2",	{ ".tar.bz", ".tbz",
116 					  ".tar.bz2",".tbz2" },
117 };
118 
119 typedef struct {
120 	int	kid;
121 	int	fd;	/* original fd */
122 	int	rfd;	/* replacement fd */
123 	int	input;
124 	int	open;
125 } Pushstate;
126 
127 #define OTHER(rdwr) ((rdwr) == Rd? Wr: Rd)
128 
129 static int debug;
130 static int fixednblock;
131 static int verb;
132 static int posix = 1;
133 static int docreate;
134 static int aruid;
135 static int argid;
136 static int relative = 1;
137 static int settime;
138 static int verbose;
139 static int docompress;
140 static int keepexisting;
141 static int ignerrs;		/* flag: ignore i/o errors if possible */
142 static Off blkoff;		/* offset of the current archive block (not Tblock) */
143 static Off nexthdr;
144 
145 static int nblock = Dblock;
146 static int resync;
147 static char *usefile, *arname = "archive";
148 static char origdir[Maxname*2];
149 static Hdr *tpblk, *endblk;
150 static Hdr *curblk;
151 
152 static void
usage(void)153 usage(void)
154 {
155 	fprint(2, "usage: %s {crtx}[PRTfgikmpsuvz] [archive] [file1 file2...]\n",
156 		argv0);
157 	exits("usage");
158 }
159 
160 /* I/O, with error retry or exit */
161 
162 static int
cope(char * name,int fd,void * buf,long len,Off off)163 cope(char *name, int fd, void *buf, long len, Off off)
164 {
165 	fprint(2, "%s: %serror reading %s: %r\n", argv0,
166 		(ignerrs? "ignoring ": ""), name);
167 	if (!ignerrs)
168 		exits("read error");
169 
170 	/* pretend we read len bytes of zeroes */
171 	memset(buf, 0, len);
172 	if (off >= 0)			/* seekable? */
173 		seek(fd, off + len, 0);
174 	return len;
175 }
176 
177 static int
eread(char * name,int fd,void * buf,long len)178 eread(char *name, int fd, void *buf, long len)
179 {
180 	int rd;
181 	Off off;
182 
183 	off = seek(fd, 0, 1);		/* for coping with errors */
184 	rd = read(fd, buf, len);
185 	if (rd < 0)
186 		rd = cope(name, fd, buf, len, off);
187 	return rd;
188 }
189 
190 static int
ereadn(char * name,int fd,void * buf,long len)191 ereadn(char *name, int fd, void *buf, long len)
192 {
193 	int rd;
194 	Off off;
195 
196 	off = seek(fd, 0, 1);
197 	rd = readn(fd, buf, len);
198 	if (rd < 0)
199 		rd = cope(name, fd, buf, len, off);
200 	return rd;
201 }
202 
203 static int
ewrite(char * name,int fd,void * buf,long len)204 ewrite(char *name, int fd, void *buf, long len)
205 {
206 	int rd;
207 
208 	werrstr("");
209 	rd = write(fd, buf, len);
210 	if (rd != len)
211 		sysfatal("error writing %s: %r", name);
212 	return rd;
213 }
214 
215 /* compression */
216 
217 static Compress *
compmethod(char * name)218 compmethod(char *name)
219 {
220 	int i, nmlen, sfxlen;
221 	Compress *cp;
222 
223 	if (name != nil) {
224 		nmlen = strlen(name);
225 		for (cp = comps; cp < comps + nelem(comps); cp++)
226 			for (i = 0; i < nelem(cp->sfx) && cp->sfx[i]; i++) {
227 				sfxlen = strlen(cp->sfx[i]);
228 				if (nmlen > sfxlen &&
229 				    strcmp(cp->sfx[i], name+nmlen-sfxlen) == 0)
230 					return cp;
231 			}
232 	}
233 	return docompress? comps: nil;
234 }
235 
236 /*
237  * push a filter, cmd, onto fd.  if input, it's an input descriptor.
238  * returns a descriptor to replace fd, or -1 on error.
239  */
240 static int
push(int fd,char * cmd,int input,Pushstate * ps)241 push(int fd, char *cmd, int input, Pushstate *ps)
242 {
243 	int nfd, pifds[2];
244 	String *s;
245 
246 	ps->open = 0;
247 	ps->fd = fd;
248 	ps->input = input;
249 	if (fd < 0 || pipe(pifds) < 0)
250 		return -1;
251 	ps->kid = fork();
252 	switch (ps->kid) {
253 	case -1:
254 		return -1;
255 	case 0:
256 		if (input)
257 			dup(pifds[Wr], Stdout);
258 		else
259 			dup(pifds[Rd], Stdin);
260 		close(pifds[input? Rd: Wr]);
261 		dup(fd, (input? Stdin: Stdout));
262 		s = s_new();
263 		if (cmd[0] != '/')
264 			s_append(s, "/bin/");
265 		s_append(s, cmd);
266 		execl(s_to_c(s), cmd, nil);
267 		sysfatal("can't exec %s: %r", cmd);
268 	default:
269 		nfd = pifds[input? Rd: Wr];
270 		close(pifds[input? Wr: Rd]);
271 		break;
272 	}
273 	ps->rfd = nfd;
274 	ps->open = 1;
275 	return nfd;
276 }
277 
278 static char *
pushclose(Pushstate * ps)279 pushclose(Pushstate *ps)
280 {
281 	Waitmsg *wm;
282 
283 	if (ps->fd < 0 || ps->rfd < 0 || !ps->open)
284 		return "not open";
285 	close(ps->rfd);
286 	ps->rfd = -1;
287 	ps->open = 0;
288 	while ((wm = wait()) != nil && wm->pid != ps->kid)
289 		continue;
290 	return wm? wm->msg: nil;
291 }
292 
293 /*
294  * block-buffer management
295  */
296 
297 static void
initblks(void)298 initblks(void)
299 {
300 	free(tpblk);
301 	tpblk = malloc(Tblock * nblock);
302 	assert(tpblk != nil);
303 	endblk = tpblk + nblock;
304 }
305 
306 /*
307  * (re)fill block buffers from archive.  `justhdr' means we don't care
308  * about the data before the next header block.
309  */
310 static char *
refill(int ar,char * bufs,int justhdr)311 refill(int ar, char *bufs, int justhdr)
312 {
313 	int i, n;
314 	unsigned bytes = Tblock * nblock;
315 	static int done, first = 1, seekable;
316 
317 	if (done)
318 		return nil;
319 
320 	blkoff = seek(ar, 0, 1);		/* note position for `tar r' */
321 	if (first)
322 		seekable = blkoff >= 0;
323 	/* try to size non-pipe input at first read */
324 	if (first && usefile && !fixednblock) {
325 		n = eread(arname, ar, bufs, bytes);
326 		if (n == 0)
327 			sysfatal("EOF reading archive %s: %r", arname);
328 		i = n;
329 		if (i % Tblock != 0)
330 			sysfatal("%s: archive block size (%d) error", arname, i);
331 		i /= Tblock;
332 		if (i != nblock) {
333 			nblock = i;
334 			fprint(2, "%s: blocking = %d\n", argv0, nblock);
335 			endblk = (Hdr *)bufs + nblock;
336 			bytes = n;
337 		}
338 	} else if (justhdr && seekable && nexthdr - blkoff >= bytes) {
339 		/* optimisation for huge archive members on seekable media */
340 		if (seek(ar, bytes, 1) < 0)
341 			sysfatal("can't seek on archive %s: %r", arname);
342 		n = bytes;
343 	} else
344 		n = ereadn(arname, ar, bufs, bytes);
345 	first = 0;
346 
347 	if (n == 0)
348 		sysfatal("unexpected EOF reading archive %s", arname);
349 	if (n % Tblock != 0)
350 		sysfatal("partial block read from archive %s", arname);
351 	if (n != bytes) {
352 		done = 1;
353 		memset(bufs + n, 0, bytes - n);
354 	}
355 	return bufs;
356 }
357 
358 static Hdr *
getblk(int ar,Refill rfp,int justhdr)359 getblk(int ar, Refill rfp, int justhdr)
360 {
361 	if (curblk == nil || curblk >= endblk) {  /* input block exhausted? */
362 		if (rfp != nil && (*rfp)(ar, (char *)tpblk, justhdr) == nil)
363 			return nil;
364 		curblk = tpblk;
365 	}
366 	return curblk++;
367 }
368 
369 static Hdr *
getblkrd(int ar,int justhdr)370 getblkrd(int ar, int justhdr)
371 {
372 	return getblk(ar, refill, justhdr);
373 }
374 
375 static Hdr *
getblke(int ar)376 getblke(int ar)
377 {
378 	return getblk(ar, nil, Alldata);
379 }
380 
381 static Hdr *
getblkz(int ar)382 getblkz(int ar)
383 {
384 	Hdr *hp = getblke(ar);
385 
386 	if (hp != nil)
387 		memset(hp->data, 0, Tblock);
388 	return hp;
389 }
390 
391 /*
392  * how many block buffers are available, starting at the address
393  * just returned by getblk*?
394  */
395 static int
gothowmany(int max)396 gothowmany(int max)
397 {
398 	int n = endblk - (curblk - 1);
399 
400 	return n > max? max: n;
401 }
402 
403 /*
404  * indicate that one is done with the last block obtained from getblke
405  * and it is now available to be written into the archive.
406  */
407 static void
putlastblk(int ar)408 putlastblk(int ar)
409 {
410 	unsigned bytes = Tblock * nblock;
411 
412 	/* if writing end-of-archive, aid compression (good hygiene too) */
413 	if (curblk < endblk)
414 		memset(curblk, 0, (char *)endblk - (char *)curblk);
415 	ewrite(arname, ar, tpblk, bytes);
416 }
417 
418 static void
putblk(int ar)419 putblk(int ar)
420 {
421 	if (curblk >= endblk)
422 		putlastblk(ar);
423 }
424 
425 static void
putbackblk(int ar)426 putbackblk(int ar)
427 {
428 	curblk--;
429 	USED(ar);
430 }
431 
432 static void
putreadblks(int ar,int blks)433 putreadblks(int ar, int blks)
434 {
435 	curblk += blks - 1;
436 	USED(ar);
437 }
438 
439 static void
putblkmany(int ar,int blks)440 putblkmany(int ar, int blks)
441 {
442 	assert(blks > 0);
443 	curblk += blks - 1;
444 	putblk(ar);
445 }
446 
447 /*
448  * common routines
449  */
450 
451 /*
452  * modifies hp->chksum but restores it; important for the last block of the
453  * old archive when updating with `tar rf archive'
454  */
455 static long
chksum(Hdr * hp)456 chksum(Hdr *hp)
457 {
458 	int n = Tblock;
459 	long i = 0;
460 	uchar *cp = hp->data;
461 	char oldsum[sizeof hp->chksum];
462 
463 	memmove(oldsum, hp->chksum, sizeof oldsum);
464 	memset(hp->chksum, ' ', sizeof hp->chksum);
465 	while (n-- > 0)
466 		i += *cp++;
467 	memmove(hp->chksum, oldsum, sizeof oldsum);
468 	return i;
469 }
470 
471 static int
isustar(Hdr * hp)472 isustar(Hdr *hp)
473 {
474 	return strcmp(hp->magic, "ustar") == 0;
475 }
476 
477 /*
478  * s is at most n bytes long, but need not be NUL-terminated.
479  * if shorter than n bytes, all bytes after the first NUL must also
480  * be NUL.
481  */
482 static int
strnlen(char * s,int n)483 strnlen(char *s, int n)
484 {
485 	return s[n - 1] != '\0'? n: strlen(s);
486 }
487 
488 /* set fullname from header */
489 static char *
name(Hdr * hp)490 name(Hdr *hp)
491 {
492 	int pfxlen, namlen;
493 	char *fullname;
494 	static char fullnamebuf[2+Maxname+1];  /* 2+ for ./ on relative names */
495 
496 	fullname = fullnamebuf+2;
497 	namlen = strnlen(hp->name, sizeof hp->name);
498 	if (hp->prefix[0] == '\0' || !isustar(hp)) {	/* old-style name? */
499 		memmove(fullname, hp->name, namlen);
500 		fullname[namlen] = '\0';
501 		return fullname;
502 	}
503 
504 	/* name is in two pieces */
505 	pfxlen = strnlen(hp->prefix, sizeof hp->prefix);
506 	memmove(fullname, hp->prefix, pfxlen);
507 	fullname[pfxlen] = '/';
508 	memmove(fullname + pfxlen + 1, hp->name, namlen);
509 	fullname[pfxlen + 1 + namlen] = '\0';
510 	return fullname;
511 }
512 
513 static int
isdir(Hdr * hp)514 isdir(Hdr *hp)
515 {
516 	/* the mode test is ugly but sometimes necessary */
517 	return hp->linkflag == LF_DIR ||
518 		strrchr(name(hp), '\0')[-1] == '/' ||
519 		(strtoul(hp->mode, nil, 8)&0170000) == 040000;
520 }
521 
522 static int
eotar(Hdr * hp)523 eotar(Hdr *hp)
524 {
525 	return name(hp)[0] == '\0';
526 }
527 
528 /*
529 static uvlong
530 getbe(uchar *src, int size)
531 {
532 	uvlong vl = 0;
533 
534 	while (size-- > 0) {
535 		vl <<= 8;
536 		vl |= *src++;
537 	}
538 	return vl;
539 }
540  */
541 
542 static void
putbe(uchar * dest,uvlong vl,int size)543 putbe(uchar *dest, uvlong vl, int size)
544 {
545 	for (dest += size; size-- > 0; vl >>= 8)
546 		*--dest = vl;
547 }
548 
549 /*
550  * cautious parsing of octal numbers as ascii strings in
551  * a tar header block.  this is particularly important for
552  * trusting the checksum when trying to resync.
553  */
554 static uvlong
hdrotoull(char * st,char * end,uvlong errval,char * name,char * field)555 hdrotoull(char *st, char *end, uvlong errval, char *name, char *field)
556 {
557 	char *numb;
558 
559 	for (numb = st; (*numb == ' ' || *numb == '\0') && numb < end; numb++)
560 		;
561 	if (numb < end && isascii(*numb) && isdigit(*numb))
562 		return strtoull(numb, nil, 8);
563 	else if (numb >= end)
564 		fprint(2, "%s: %s: empty %s in header\n", argv0, name, field);
565 	else
566 		fprint(2, "%s: %s: %s: non-numeric %s in header\n",
567 			argv0, name, numb, field);
568 	return errval;
569 }
570 
571 /*
572  * return the nominal size from the header block, which is not always the
573  * size in the archive (the archive size may be zero for some file types
574  * regardless of the nominal size).
575  *
576  * gnu and freebsd tars are now recording vlongs as big-endian binary
577  * with a flag in byte 0 to indicate this, which permits file sizes up to
578  * 2^64-1 (actually 2^80-1 but our file sizes are vlongs) rather than 2^33-1.
579  */
580 static Off
hdrsize(Hdr * hp)581 hdrsize(Hdr *hp)
582 {
583 	uchar *p;
584 
585 	if((uchar)hp->size[0] == Binnegsz) {
586 		fprint(2, "%s: %s: negative length, which is insane\n",
587 			argv0, name(hp));
588 		return 0;
589 	} else if((uchar)hp->size[0] == Binsize) {
590 		p = (uchar *)hp->size + sizeof hp->size - 1 -
591 			sizeof(vlong);		/* -1 for terminating space */
592 		return G8BEBYTE(p);
593 	}
594 
595 	return hdrotoull(hp->size, hp->size + sizeof hp->size, 0,
596 		name(hp), "size");
597 }
598 
599 /*
600  * return the number of bytes recorded in the archive.
601  */
602 static Off
arsize(Hdr * hp)603 arsize(Hdr *hp)
604 {
605 	if(isdir(hp) || islink(hp->linkflag))
606 		return 0;
607 	return hdrsize(hp);
608 }
609 
610 static long
parsecksum(char * cksum,char * name)611 parsecksum(char *cksum, char *name)
612 {
613 	Hdr *hp;
614 
615 	return hdrotoull(cksum, cksum + sizeof hp->chksum, (uvlong)-1LL,
616 		name, "checksum");
617 }
618 
619 static Hdr *
readhdr(int ar)620 readhdr(int ar)
621 {
622 	long hdrcksum;
623 	Hdr *hp;
624 
625 	hp = getblkrd(ar, Alldata);
626 	if (hp == nil)
627 		sysfatal("unexpected EOF instead of archive header in %s",
628 			arname);
629 	if (eotar(hp))			/* end-of-archive block? */
630 		return nil;
631 
632 	hdrcksum = parsecksum(hp->chksum, name(hp));
633 	if (hdrcksum == -1 || chksum(hp) != hdrcksum) {
634 		if (!resync)
635 			sysfatal("bad archive header checksum in %s: "
636 				"name %.100s...; expected %#luo got %#luo",
637 				arname, hp->name, hdrcksum, chksum(hp));
638 		fprint(2, "%s: skipping past archive header with bad checksum in %s...",
639 			argv0, arname);
640 		do {
641 			hp = getblkrd(ar, Alldata);
642 			if (hp == nil)
643 				sysfatal("unexpected EOF looking for archive header in %s",
644 					arname);
645 			hdrcksum = parsecksum(hp->chksum, name(hp));
646 		} while (hdrcksum == -1 || chksum(hp) != hdrcksum);
647 		fprint(2, "found %s\n", name(hp));
648 	}
649 	nexthdr += Tblock*(1 + BYTES2TBLKS(arsize(hp)));
650 	return hp;
651 }
652 
653 /*
654  * tar r[c]
655  */
656 
657 /*
658  * if name is longer than Namsiz bytes, try to split it at a slash and fit the
659  * pieces into hp->prefix and hp->name.
660  */
661 static int
putfullname(Hdr * hp,char * name)662 putfullname(Hdr *hp, char *name)
663 {
664 	int namlen, pfxlen;
665 	char *sl, *osl;
666 	String *slname = nil;
667 
668 	if (isdir(hp)) {
669 		slname = s_new();
670 		s_append(slname, name);
671 		s_append(slname, "/");		/* posix requires this */
672 		name = s_to_c(slname);
673 	}
674 
675 	namlen = strlen(name);
676 	if (namlen <= Namsiz) {
677 		strncpy(hp->name, name, Namsiz);
678 		hp->prefix[0] = '\0';		/* ustar paranoia */
679 		return 0;
680 	}
681 
682 	if (!posix || namlen > Maxname) {
683 		fprint(2, "%s: name too long for tar header: %s\n",
684 			argv0, name);
685 		return -1;
686 	}
687 	/*
688 	 * try various splits until one results in pieces that fit into the
689 	 * appropriate fields of the header.  look for slashes from right
690 	 * to left, in the hopes of putting the largest part of the name into
691 	 * hp->prefix, which is larger than hp->name.
692 	 */
693 	sl = strrchr(name, '/');
694 	while (sl != nil) {
695 		pfxlen = sl - name;
696 		if (pfxlen <= sizeof hp->prefix && namlen-1 - pfxlen <= Namsiz)
697 			break;
698 		osl = sl;
699 		*osl = '\0';
700 		sl = strrchr(name, '/');
701 		*osl = '/';
702 	}
703 	if (sl == nil) {
704 		fprint(2, "%s: name can't be split to fit tar header: %s\n",
705 			argv0, name);
706 		return -1;
707 	}
708 	*sl = '\0';
709 	strncpy(hp->prefix, name, sizeof hp->prefix);
710 	*sl++ = '/';
711 	strncpy(hp->name, sl, sizeof hp->name);
712 	if (slname)
713 		s_free(slname);
714 	return 0;
715 }
716 
717 static int
mkhdr(Hdr * hp,Dir * dir,char * file)718 mkhdr(Hdr *hp, Dir *dir, char *file)
719 {
720 	int r;
721 
722 	/*
723 	 * some of these fields run together, so we format them left-to-right
724 	 * and don't use snprint.
725 	 */
726 	sprint(hp->mode, "%6lo ", dir->mode & 0777);
727 	sprint(hp->uid, "%6o ", aruid);
728 	sprint(hp->gid, "%6o ", argid);
729 	if (dir->length >= (Off)1<<32) {
730 		static int printed;
731 
732 		if (!printed) {
733 			printed = 1;
734 			fprint(2, "%s: storing large sizes in \"base 256\"\n", argv0);
735 		}
736 		hp->size[0] = Binsize;
737 		/* emit so-called `base 256' representation of size */
738 		putbe((uchar *)hp->size+1, dir->length, sizeof hp->size - 2);
739 		hp->size[sizeof hp->size - 1] = ' ';
740 	} else
741 		sprint(hp->size, "%11lluo ", dir->length);
742 	sprint(hp->mtime, "%11luo ", dir->mtime);
743 	hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
744 	r = putfullname(hp, file);
745 	if (posix) {
746 		strncpy(hp->magic, "ustar", sizeof hp->magic);
747 		strncpy(hp->version, "00", sizeof hp->version);
748 		strncpy(hp->uname, dir->uid, sizeof hp->uname);
749 		strncpy(hp->gname, dir->gid, sizeof hp->gname);
750 	}
751 	sprint(hp->chksum, "%6luo", chksum(hp));
752 	return r;
753 }
754 
755 static void addtoar(int ar, char *file, char *shortf);
756 
757 static void
addtreetoar(int ar,char * file,char * shortf,int fd)758 addtreetoar(int ar, char *file, char *shortf, int fd)
759 {
760 	int n;
761 	Dir *dent, *dirents;
762 	String *name = s_new();
763 
764 	n = dirreadall(fd, &dirents);
765 	if (n < 0)
766 		fprint(2, "%s: dirreadall %s: %r\n", argv0, file);
767 	close(fd);
768 	if (n <= 0)
769 		return;
770 
771 	if (chdir(shortf) < 0)
772 		sysfatal("chdir %s: %r", file);
773 	if (Debug)
774 		fprint(2, "chdir %s\t# %s\n", shortf, file);
775 
776 	for (dent = dirents; dent < dirents + n; dent++) {
777 		s_reset(name);
778 		s_append(name, file);
779 		s_append(name, "/");
780 		s_append(name, dent->name);
781 		addtoar(ar, s_to_c(name), dent->name);
782 	}
783 	s_free(name);
784 	free(dirents);
785 
786 	/*
787 	 * this assumes that shortf is just one component, which is true
788 	 * during directory descent, but not necessarily true of command-line
789 	 * arguments.  Our caller (or addtoar's) must reset the working
790 	 * directory if necessary.
791 	 */
792 	if (chdir("..") < 0)
793 		sysfatal("chdir %s/..: %r", file);
794 	if (Debug)
795 		fprint(2, "chdir ..\n");
796 }
797 
798 static void
addtoar(int ar,char * file,char * shortf)799 addtoar(int ar, char *file, char *shortf)
800 {
801 	int n, fd, isdir;
802 	long bytes, blksread;
803 	ulong blksleft;
804 	Hdr *hbp;
805 	Dir *dir;
806 	String *name = nil;
807 
808 	if (shortf[0] == '#') {
809 		name = s_new();
810 		s_append(name, "./");
811 		s_append(name, shortf);
812 		shortf = s_to_c(name);
813 	}
814 
815 	if (Debug)
816 		fprint(2, "opening %s	# %s\n", shortf, file);
817 	fd = open(shortf, OREAD);
818 	if (fd < 0) {
819 		fprint(2, "%s: can't open %s: %r\n", argv0, file);
820 		if (name)
821 			s_free(name);
822 		return;
823 	}
824 	dir = dirfstat(fd);
825 	if (dir == nil)
826 		sysfatal("can't fstat %s: %r", file);
827 
828 	hbp = getblkz(ar);
829 	isdir = (dir->qid.type & QTDIR) != 0;
830 	if (mkhdr(hbp, dir, file) < 0) {
831 		putbackblk(ar);
832 		free(dir);
833 		close(fd);
834 		if (name)
835 			s_free(name);
836 		return;
837 	}
838 	putblk(ar);
839 
840 	blksleft = BYTES2TBLKS(dir->length);
841 	free(dir);
842 
843 	if (isdir)
844 		addtreetoar(ar, file, shortf, fd);
845 	else {
846 		for (; blksleft > 0; blksleft -= blksread) {
847 			hbp = getblke(ar);
848 			blksread = gothowmany(blksleft);
849 			assert(blksread >= 0);
850 			bytes = blksread * Tblock;
851 			n = ereadn(file, fd, hbp->data, bytes);
852 			assert(n >= 0);
853 			/*
854 			 * ignore EOF.  zero any partial block to aid
855 			 * compression and emergency recovery of data.
856 			 */
857 			if (n < Tblock)
858 				memset(hbp->data + n, 0, bytes - n);
859 			putblkmany(ar, blksread);
860 		}
861 		close(fd);
862 		if (verbose)
863 			fprint(2, "%s\n", file);
864 	}
865 	if (name)
866 		s_free(name);
867 }
868 
869 static void
skip(int ar,Hdr * hp,char * msg)870 skip(int ar, Hdr *hp, char *msg)
871 {
872 	ulong blksleft, blksread;
873 	Off bytes;
874 
875 	bytes = arsize(hp);
876 	for (blksleft = BYTES2TBLKS(bytes); blksleft > 0; blksleft -= blksread) {
877 		if (getblkrd(ar, Justnxthdr) == nil)
878 			sysfatal("unexpected EOF on archive %s %s", arname, msg);
879 		blksread = gothowmany(blksleft);
880 		putreadblks(ar, blksread);
881 	}
882 }
883 
884 static void
skiptoend(int ar)885 skiptoend(int ar)
886 {
887 	Hdr *hp;
888 
889 	while ((hp = readhdr(ar)) != nil)
890 		skip(ar, hp, "skipping to end");
891 
892 	/*
893 	 * we have just read the end-of-archive Tblock.
894 	 * now seek back over the (big) archive block containing it,
895 	 * and back up curblk ptr over end-of-archive Tblock in memory.
896 	 */
897 	if (seek(ar, blkoff, 0) < 0)
898 		sysfatal("can't seek back over end-of-archive in %s: %r", arname);
899 	curblk--;
900 }
901 
902 static char *
replace(char ** argv)903 replace(char **argv)
904 {
905 	int i, ar;
906 	char *arg;
907 	Compress *comp = nil;
908 	Pushstate ps;
909 
910 	/* open archive to be updated */
911 	if (usefile && docreate)
912 		ar = create(usefile, OWRITE, 0666);
913 	else if (usefile) {
914 		if (docompress)
915 			sysfatal("cannot update compressed archive");
916 		ar = open(usefile, ORDWR);
917 	} else
918 		ar = Stdout;
919 
920 	/* push compression filter, if requested */
921 	if (docompress) {
922 		comp = compmethod(usefile);
923 		if (comp)
924 			ar = push(ar, comp->comp, Output, &ps);
925 	}
926 	if (ar < 0)
927 		sysfatal("can't open archive %s: %r", usefile);
928 
929 	if (usefile && !docreate)
930 		skiptoend(ar);
931 
932 	for (i = 0; argv[i] != nil; i++) {
933 		arg = argv[i];
934 		cleanname(arg);
935 		if (strcmp(arg, "..") == 0 || strncmp(arg, "../", 3) == 0)
936 			fprint(2, "%s: name starting with .. is a bad idea\n",
937 				argv0);
938 		addtoar(ar, arg, arg);
939 		chdir(origdir);		/* for correctness & profiling */
940 	}
941 
942 	/* write end-of-archive marker */
943 	getblkz(ar);
944 	putblk(ar);
945 	getblkz(ar);
946 	putlastblk(ar);
947 
948 	if (comp)
949 		return pushclose(&ps);
950 	if (ar > Stderr)
951 		close(ar);
952 	return nil;
953 }
954 
955 /*
956  * tar [xt]
957  */
958 
959 /* is pfx a file-name prefix of name? */
960 static int
prefix(char * name,char * pfx)961 prefix(char *name, char *pfx)
962 {
963 	int pfxlen = strlen(pfx);
964 	char clpfx[Maxname+1];
965 
966 	if (pfxlen > Maxname)
967 		return 0;
968 	strcpy(clpfx, pfx);
969 	cleanname(clpfx);
970 	return strncmp(clpfx, name, pfxlen) == 0 &&
971 		(name[pfxlen] == '\0' || name[pfxlen] == '/');
972 }
973 
974 static int
match(char * name,char ** argv)975 match(char *name, char **argv)
976 {
977 	int i;
978 	char clname[Maxname+1];
979 
980 	if (argv[0] == nil)
981 		return 1;
982 	strcpy(clname, name);
983 	cleanname(clname);
984 	for (i = 0; argv[i] != nil; i++)
985 		if (prefix(clname, argv[i]))
986 			return 1;
987 	return 0;
988 }
989 
990 static void
cantcreate(char * s,int mode)991 cantcreate(char *s, int mode)
992 {
993 	int len;
994 	static char *last;
995 
996 	/*
997 	 * Always print about files.  Only print about directories
998 	 * we haven't printed about.  (Assumes archive is ordered
999 	 * nicely.)
1000 	 */
1001 	if(mode&DMDIR){
1002 		if(last){
1003 			/* already printed this directory */
1004 			if(strcmp(s, last) == 0)
1005 				return;
1006 			/* printed a higher directory, so printed this one */
1007 			len = strlen(s);
1008 			if(memcmp(s, last, len) == 0 && last[len] == '/')
1009 				return;
1010 		}
1011 		/* save */
1012 		free(last);
1013 		last = strdup(s);
1014 	}
1015 	fprint(2, "%s: can't create %s: %r\n", argv0, s);
1016 }
1017 
1018 static int
makedir(char * s)1019 makedir(char *s)
1020 {
1021 	int f;
1022 
1023 	if (access(s, AEXIST) == 0)
1024 		return -1;
1025 	f = create(s, OREAD, DMDIR | 0777);
1026 	if (f >= 0)
1027 		close(f);
1028 	else
1029 		cantcreate(s, DMDIR);
1030 	return f;
1031 }
1032 
1033 static int
mkpdirs(char * s)1034 mkpdirs(char *s)
1035 {
1036 	int err;
1037 	char *p;
1038 
1039 	p = s;
1040 	err = 0;
1041 	while (!err && (p = strchr(p+1, '/')) != nil) {
1042 		*p = '\0';
1043 		err = (access(s, AEXIST) < 0 && makedir(s) < 0);
1044 		*p = '/';
1045 	}
1046 	return -err;
1047 }
1048 
1049 /* Call access but preserve the error string. */
1050 static int
xaccess(char * name,int mode)1051 xaccess(char *name, int mode)
1052 {
1053 	char err[ERRMAX];
1054 	int rv;
1055 
1056 	err[0] = 0;
1057 	errstr(err, sizeof err);
1058 	rv = access(name, mode);
1059 	errstr(err, sizeof err);
1060 	return rv;
1061 }
1062 
1063 static int
openfname(Hdr * hp,char * fname,int dir,int mode)1064 openfname(Hdr *hp, char *fname, int dir, int mode)
1065 {
1066 	int fd;
1067 
1068 	fd = -1;
1069 	cleanname(fname);
1070 	switch (hp->linkflag) {
1071 	case LF_LINK:
1072 	case LF_SYMLINK1:
1073 	case LF_SYMLINK2:
1074 		fprint(2, "%s: can't make (sym)link %s\n",
1075 			argv0, fname);
1076 		break;
1077 	case LF_FIFO:
1078 		fprint(2, "%s: can't make fifo %s\n", argv0, fname);
1079 		break;
1080 	default:
1081 		if (!keepexisting || access(fname, AEXIST) < 0) {
1082 			int rw = (dir? OREAD: OWRITE);
1083 
1084 			fd = create(fname, rw, mode);
1085 			if (fd < 0) {
1086 				mkpdirs(fname);
1087 				fd = create(fname, rw, mode);
1088 			}
1089 			if (fd < 0 && (!dir || xaccess(fname, AEXIST) < 0))
1090 			    	cantcreate(fname, mode);
1091 		}
1092 		if (fd >= 0 && verbose)
1093 			fprint(2, "%s\n", fname);
1094 		break;
1095 	}
1096 	return fd;
1097 }
1098 
1099 /* copy from archive to file system (or nowhere for table-of-contents) */
1100 static void
copyfromar(int ar,int fd,char * fname,ulong blksleft,Off bytes)1101 copyfromar(int ar, int fd, char *fname, ulong blksleft, Off bytes)
1102 {
1103 	int wrbytes;
1104 	ulong blksread;
1105 	Hdr *hbp;
1106 
1107 	if (blksleft == 0 || bytes < 0)
1108 		bytes = 0;
1109 	for (; blksleft > 0; blksleft -= blksread) {
1110 		hbp = getblkrd(ar, (fd >= 0? Alldata: Justnxthdr));
1111 		if (hbp == nil)
1112 			sysfatal("unexpected EOF on archive extracting %s from %s",
1113 				fname, arname);
1114 		blksread = gothowmany(blksleft);
1115 		if (blksread <= 0) {
1116 			fprint(2, "%s: got %ld blocks reading %s!\n",
1117 				argv0, blksread, fname);
1118 			blksread = 0;
1119 		}
1120 		wrbytes = Tblock*blksread;
1121 		assert(bytes >= 0);
1122 		if(wrbytes > bytes)
1123 			wrbytes = bytes;
1124 		assert(wrbytes >= 0);
1125 		if (fd >= 0)
1126 			ewrite(fname, fd, hbp->data, wrbytes);
1127 		putreadblks(ar, blksread);
1128 		bytes -= wrbytes;
1129 		assert(bytes >= 0);
1130 	}
1131 	if (bytes > 0)
1132 		fprint(2, "%s: %lld bytes uncopied at EOF on archive %s; "
1133 			"%s not fully extracted\n", argv0, bytes, arname, fname);
1134 }
1135 
1136 static void
wrmeta(int fd,Hdr * hp,long mtime,int mode)1137 wrmeta(int fd, Hdr *hp, long mtime, int mode)		/* update metadata */
1138 {
1139 	Dir nd;
1140 
1141 	nulldir(&nd);
1142 	nd.mtime = mtime;
1143 	nd.mode = mode;
1144 	dirfwstat(fd, &nd);
1145 	if (isustar(hp)) {
1146 		nulldir(&nd);
1147 		nd.gid = hp->gname;
1148 		dirfwstat(fd, &nd);
1149 		nulldir(&nd);
1150 		nd.uid = hp->uname;
1151 		dirfwstat(fd, &nd);
1152 	}
1153 }
1154 
1155 /*
1156  * copy a file from the archive into the filesystem.
1157  * fname is result of name(), so has two extra bytes at beginning.
1158  */
1159 static void
extract1(int ar,Hdr * hp,char * fname)1160 extract1(int ar, Hdr *hp, char *fname)
1161 {
1162 	int fd = -1, dir = 0;
1163 	long mtime = strtol(hp->mtime, nil, 8);
1164 	ulong mode = strtoul(hp->mode, nil, 8) & 0777;
1165 	Off bytes = hdrsize(hp);		/* for printing */
1166 	ulong blksleft = BYTES2TBLKS(arsize(hp));
1167 
1168 	/* fiddle name, figure out mode and blocks */
1169 	if (isdir(hp)) {
1170 		mode |= DMDIR|0700;
1171 		dir = 1;
1172 	}
1173 	switch (hp->linkflag) {
1174 	case LF_LINK:
1175 	case LF_SYMLINK1:
1176 	case LF_SYMLINK2:
1177 	case LF_FIFO:
1178 		blksleft = 0;
1179 		break;
1180 	}
1181 	if (relative)
1182 		if(fname[0] == '/')
1183 			*--fname = '.';
1184 		else if(fname[0] == '#'){
1185 			*--fname = '/';
1186 			*--fname = '.';
1187 		}
1188 
1189 	if (verb == Xtract)
1190 		fd = openfname(hp, fname, dir, mode);
1191 	else if (verbose) {
1192 		char *cp = ctime(mtime);
1193 
1194 		print("%M %8lld %-12.12s %-4.4s %s\n",
1195 			mode, bytes, cp+4, cp+24, fname);
1196 	} else
1197 		print("%s\n", fname);
1198 
1199 	copyfromar(ar, fd, fname, blksleft, bytes);
1200 
1201 	/* touch up meta data and close */
1202 	if (fd >= 0) {
1203 		/*
1204 		 * directories should be wstated *after* we're done
1205 		 * creating files in them, but we don't do that.
1206 		 */
1207 		if (settime)
1208 			wrmeta(fd, hp, mtime, mode);
1209 		close(fd);
1210 	}
1211 }
1212 
1213 static char *
extract(char ** argv)1214 extract(char **argv)
1215 {
1216 	int ar;
1217 	char *longname;
1218 	char msg[Maxname + 40];
1219 	Compress *comp;
1220 	Hdr *hp;
1221 	Pushstate ps;
1222 
1223 	/* open archive to be read */
1224 	if (usefile)
1225 		ar = open(usefile, OREAD);
1226 	else
1227 		ar = Stdin;
1228 
1229 	/* push decompression filter if requested or extension is known */
1230 	comp = compmethod(usefile);
1231 	if (comp)
1232 		ar = push(ar, comp->decomp, Input, &ps);
1233 	if (ar < 0)
1234 		sysfatal("can't open archive %s: %r", usefile);
1235 
1236 	while ((hp = readhdr(ar)) != nil) {
1237 		longname = name(hp);
1238 		if (match(longname, argv))
1239 			extract1(ar, hp, longname);
1240 		else {
1241 			snprint(msg, sizeof msg, "extracting %s", longname);
1242 			skip(ar, hp, msg);
1243 		}
1244 	}
1245 
1246 	if (comp)
1247 		return pushclose(&ps);
1248 	if (ar > Stderr)
1249 		close(ar);
1250 	return nil;
1251 }
1252 
1253 void
main(int argc,char * argv[])1254 main(int argc, char *argv[])
1255 {
1256 	int errflg = 0;
1257 	char *ret = nil;
1258 
1259 	fmtinstall('M', dirmodefmt);
1260 
1261 	TARGBEGIN {
1262 	case 'c':
1263 		docreate++;
1264 		verb = Replace;
1265 		break;
1266 	case 'f':
1267 		usefile = arname = EARGF(usage());
1268 		break;
1269 	case 'g':
1270 		argid = strtoul(EARGF(usage()), 0, 0);
1271 		break;
1272 	case 'i':
1273 		ignerrs = 1;
1274 		break;
1275 	case 'k':
1276 		keepexisting++;
1277 		break;
1278 	case 'm':	/* compatibility */
1279 		settime = 0;
1280 		break;
1281 	case 'p':
1282 		posix++;
1283 		break;
1284 	case 'P':
1285 		posix = 0;
1286 		break;
1287 	case 'r':
1288 		verb = Replace;
1289 		break;
1290 	case 'R':
1291 		relative = 0;
1292 		break;
1293 	case 's':
1294 		resync++;
1295 		break;
1296 	case 't':
1297 		verb = Toc;
1298 		break;
1299 	case 'T':
1300 		settime++;
1301 		break;
1302 	case 'u':
1303 		aruid = strtoul(EARGF(usage()), 0, 0);
1304 		break;
1305 	case 'v':
1306 		verbose++;
1307 		break;
1308 	case 'x':
1309 		verb = Xtract;
1310 		break;
1311 	case 'z':
1312 		docompress++;
1313 		break;
1314 	case '-':
1315 		break;
1316 	default:
1317 		fprint(2, "tar: unknown letter %C\n", TARGC());
1318 		errflg++;
1319 		break;
1320 	} TARGEND
1321 
1322 	if (argc < 0 || errflg)
1323 		usage();
1324 
1325 	initblks();
1326 	switch (verb) {
1327 	case Toc:
1328 	case Xtract:
1329 		ret = extract(argv);
1330 		break;
1331 	case Replace:
1332 		if (getwd(origdir, sizeof origdir) == nil)
1333 			strcpy(origdir, "/tmp");
1334 		ret = replace(argv);
1335 		break;
1336 	default:
1337 		usage();
1338 		break;
1339 	}
1340 	exits(ret);
1341 }
1342