xref: /netbsd-src/bin/pax/tar.c (revision 3b01aba77a7a698587faaae455bbfe740923c1f5)
1 /*	$NetBSD: tar.c,v 1.18 2000/02/17 03:12:26 itohy Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Keith Muller.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Keith Muller of the University of California, San Diego.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: tar.c,v 1.18 2000/02/17 03:12:26 itohy Exp $");
46 #endif
47 #endif /* not lint */
48 
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 
54 #include <ctype.h>
55 #include <errno.h>
56 #include <grp.h>
57 #include <pwd.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 
63 #include "pax.h"
64 #include "extern.h"
65 #include "tar.h"
66 
67 /*
68  * Routines for reading, writing and header identify of various versions of tar
69  */
70 
71 static u_long tar_chksm __P((char *, int));
72 static char *name_split __P((char *, int));
73 static int ul_oct __P((u_long, char *, int, int));
74 #ifndef NET2_STAT
75 static int uqd_oct __P((u_quad_t, char *, int, int));
76 #endif
77 
78 /*
79  * Routines common to all versions of tar
80  */
81 
82 static int tar_nodir;			/* do not write dirs under old tar */
83 int is_oldgnutar;			/* skip end-ofvolume checks */
84 char *gnu_hack_string;			/* ././@LongLink hackery */
85 
86 /*
87  * tar_endwr()
88  *	add the tar trailer of two null blocks
89  * Return:
90  *	0 if ok, -1 otherwise (what wr_skip returns)
91  */
92 
93 #if __STDC__
94 int
95 tar_endwr(void)
96 #else
97 int
98 tar_endwr()
99 #endif
100 {
101 	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
102 }
103 
104 /*
105  * tar_endrd()
106  *	no cleanup needed here, just return size of trailer (for append)
107  * Return:
108  *	size of trailer (2 * BLKMULT)
109  */
110 
111 #if __STDC__
112 off_t
113 tar_endrd(void)
114 #else
115 off_t
116 tar_endrd()
117 #endif
118 {
119 	return((off_t)(NULLCNT*BLKMULT));
120 }
121 
122 /*
123  * tar_trail()
124  *	Called to determine if a header block is a valid trailer. We are passed
125  *	the block, the in_sync flag (which tells us we are in resync mode;
126  *	looking for a valid header), and cnt (which starts at zero) which is
127  *	used to count the number of empty blocks we have seen so far.
128  * Return:
129  *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
130  *	could never contain a header.
131  */
132 
133 #if __STDC__
134 int
135 tar_trail(char *buf, int in_resync, int *cnt)
136 #else
137 int
138 tar_trail(buf, in_resync, cnt)
139 	char *buf;
140 	int in_resync;
141 	int *cnt;
142 #endif
143 {
144 	int i;
145 
146 	/*
147 	 * look for all zero, trailer is two consecutive blocks of zero
148 	 */
149 	for (i = 0; i < BLKMULT; ++i) {
150 		if (buf[i] != '\0')
151 			break;
152 	}
153 
154 	/*
155 	 * if not all zero it is not a trailer, but MIGHT be a header.
156 	 */
157 	if (i != BLKMULT)
158 		return(-1);
159 
160 	/*
161 	 * When given a zero block, we must be careful!
162 	 * If we are not in resync mode, check for the trailer. Have to watch
163 	 * out that we do not mis-identify file data as the trailer, so we do
164 	 * NOT try to id a trailer during resync mode. During resync mode we
165 	 * might as well throw this block out since a valid header can NEVER be
166 	 * a block of all 0 (we must have a valid file name).
167 	 */
168 	if (!in_resync && (++*cnt >= NULLCNT))
169 		return(0);
170 	return(1);
171 }
172 
173 /*
174  * ul_oct()
175  *	convert an unsigned long to an octal string. many oddball field
176  *	termination characters are used by the various versions of tar in the
177  *	different fields. term selects which kind to use. str is '0' padded
178  *	at the front to len. we are unable to use only one format as many old
179  *	tar readers are very cranky about this.
180  * Return:
181  *	0 if the number fit into the string, -1 otherwise
182  */
183 
184 #if __STDC__
185 static int
186 ul_oct(u_long val, char *str, int len, int term)
187 #else
188 static int
189 ul_oct(val, str, len, term)
190 	u_long val;
191 	char *str;
192 	int len;
193 	int term;
194 #endif
195 {
196 	char *pt;
197 
198 	/*
199 	 * term selects the appropriate character(s) for the end of the string
200 	 */
201 	pt = str + len - 1;
202 	switch(term) {
203 	case 3:
204 		*pt-- = '\0';
205 		break;
206 	case 2:
207 		*pt-- = ' ';
208 		*pt-- = '\0';
209 		break;
210 	case 1:
211 		*pt-- = ' ';
212 		break;
213 	case 0:
214 	default:
215 		*pt-- = '\0';
216 		*pt-- = ' ';
217 		break;
218 	}
219 
220 	/*
221 	 * convert and blank pad if there is space
222 	 */
223 	while (pt >= str) {
224 		*pt-- = '0' + (char)(val & 0x7);
225 		if ((val = val >> 3) == (u_long)0)
226 			break;
227 	}
228 
229 	while (pt >= str)
230 		*pt-- = '0';
231 	if (val != (u_long)0)
232 		return(-1);
233 	return(0);
234 }
235 
236 #ifndef NET2_STAT
237 /*
238  * uqd_oct()
239  *	convert an u_quad_t to an octal string. one of many oddball field
240  *	termination characters are used by the various versions of tar in the
241  *	different fields. term selects which kind to use. str is '0' padded
242  *	at the front to len. we are unable to use only one format as many old
243  *	tar readers are very cranky about this.
244  * Return:
245  *	0 if the number fit into the string, -1 otherwise
246  */
247 
248 #if __STDC__
249 static int
250 uqd_oct(u_quad_t val, char *str, int len, int term)
251 #else
252 static int
253 uqd_oct(val, str, len, term)
254 	u_quad_t val;
255 	char *str;
256 	int len;
257 	int term;
258 #endif
259 {
260 	char *pt;
261 
262 	/*
263 	 * term selects the appropriate character(s) for the end of the string
264 	 */
265 	pt = str + len - 1;
266 	switch(term) {
267 	case 3:
268 		*pt-- = '\0';
269 		break;
270 	case 2:
271 		*pt-- = ' ';
272 		*pt-- = '\0';
273 		break;
274 	case 1:
275 		*pt-- = ' ';
276 		break;
277 	case 0:
278 	default:
279 		*pt-- = '\0';
280 		*pt-- = ' ';
281 		break;
282 	}
283 
284 	/*
285 	 * convert and blank pad if there is space
286 	 */
287 	while (pt >= str) {
288 		*pt-- = '0' + (char)(val & 0x7);
289 		if ((val = val >> 3) == 0)
290 			break;
291 	}
292 
293 	while (pt >= str)
294 		*pt-- = '0';
295 	if (val != (u_quad_t)0)
296 		return(-1);
297 	return(0);
298 }
299 #endif
300 
301 /*
302  * tar_chksm()
303  *	calculate the checksum for a tar block counting the checksum field as
304  *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
305  *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
306  *	pad headers with 0.
307  * Return:
308  *	unsigned long checksum
309  */
310 
311 #if __STDC__
312 static u_long
313 tar_chksm(char *blk, int len)
314 #else
315 static u_long
316 tar_chksm(blk, len)
317 	char *blk;
318 	int len;
319 #endif
320 {
321 	char *stop;
322 	char *pt;
323 	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
324 
325 	/*
326 	 * add the part of the block before the checksum field
327 	 */
328 	pt = blk;
329 	stop = blk + CHK_OFFSET;
330 	while (pt < stop)
331 		chksm += (u_long)(*pt++ & 0xff);
332 	/*
333 	 * move past the checksum field and keep going, spec counts the
334 	 * checksum field as the sum of 8 blanks (which is pre-computed as
335 	 * BLNKSUM).
336 	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
337 	 * starts, no point in summing zero's)
338 	 */
339 	pt += CHK_LEN;
340 	stop = blk + len;
341 	while (pt < stop)
342 		chksm += (u_long)(*pt++ & 0xff);
343 	return(chksm);
344 }
345 
346 /*
347  * Routines for old BSD style tar (also made portable to sysV tar)
348  */
349 
350 /*
351  * tar_id()
352  *	determine if a block given to us is a valid tar header (and not a USTAR
353  *	header). We have to be on the lookout for those pesky blocks of	all
354  *	zero's.
355  * Return:
356  *	0 if a tar header, -1 otherwise
357  */
358 
359 #if __STDC__
360 int
361 tar_id(char *blk, int size)
362 #else
363 int
364 tar_id(blk, size)
365 	char *blk;
366 	int size;
367 #endif
368 {
369 	HD_TAR *hd;
370 	HD_USTAR *uhd;
371 
372 	if (size < BLKMULT)
373 		return(-1);
374 	hd = (HD_TAR *)blk;
375 	uhd = (HD_USTAR *)blk;
376 
377 	/*
378 	 * check for block of zero's first, a simple and fast test, then make
379 	 * sure this is not a ustar header by looking for the ustar magic
380 	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
381 	 * wrong and create archives missing the \0. Last we check the
382 	 * checksum. If this is ok we have to assume it is a valid header.
383 	 */
384 	if (hd->name[0] == '\0')
385 		return(-1);
386 	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
387 		return(-1);
388 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
389 		return(-1);
390 	return(0);
391 }
392 
393 /*
394  * tar_opt()
395  *	handle tar format specific -o options
396  * Return:
397  *	0 if ok -1 otherwise
398  */
399 
400 #if __STDC__
401 int
402 tar_opt(void)
403 #else
404 int
405 tar_opt()
406 #endif
407 {
408 	OPLIST *opt;
409 
410 	while ((opt = opt_next()) != NULL) {
411 		if (strcmp(opt->name, TAR_OPTION) ||
412 		    strcmp(opt->value, TAR_NODIR)) {
413 			tty_warn(1,
414 			    "Unknown tar format -o option/value pair %s=%s",
415 			    opt->name, opt->value);
416 			tty_warn(1,
417 			    "%s=%s is the only supported tar format option",
418 			    TAR_OPTION, TAR_NODIR);
419 			return(-1);
420 		}
421 
422 		/*
423 		 * we only support one option, and only when writing
424 		 */
425 		if ((act != APPND) && (act != ARCHIVE)) {
426 			tty_warn(1, "%s=%s is only supported when writing.",
427 			    opt->name, opt->value);
428 			return(-1);
429 		}
430 		tar_nodir = 1;
431 	}
432 	return(0);
433 }
434 
435 
436 /*
437  * tar_rd()
438  *	extract the values out of block already determined to be a tar header.
439  *	store the values in the ARCHD parameter.
440  * Return:
441  *	0
442  */
443 
444 #if __STDC__
445 int
446 tar_rd(ARCHD *arcn, char *buf)
447 #else
448 int
449 tar_rd(arcn, buf)
450 	ARCHD *arcn;
451 	char *buf;
452 #endif
453 {
454 	HD_TAR *hd;
455 	char *pt;
456 
457 	/*
458 	 * we only get proper sized buffers passed to us
459 	 */
460 	if (tar_id(buf, BLKMULT) < 0)
461 		return(-1);
462 	arcn->org_name = arcn->name;
463 	arcn->sb.st_nlink = 1;
464 	arcn->pat = NULL;
465 
466 	/*
467 	 * copy out the name and values in the stat buffer
468 	 */
469 	hd = (HD_TAR *)buf;
470 	if (gnu_hack_string) {
471 		int len = MAX(strlen(gnu_hack_string), PAXPATHLEN);
472 		arcn->nlen = l_strncpy(arcn->name, gnu_hack_string, len);
473 		arcn->name[len] = '\0';
474 		free(gnu_hack_string);
475 		gnu_hack_string = NULL;
476 	} else {
477 		arcn->nlen = l_strncpy(arcn->name, hd->name, sizeof(hd->name));
478 		arcn->name[arcn->nlen] = '\0';
479 	}
480 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
481 	    0xfff);
482 	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
483 	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
484 #	ifdef NET2_STAT
485 	arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
486 #	else
487 	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
488 #	endif
489 	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
490 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
491 
492 	/*
493 	 * have to look at the last character, it may be a '/' and that is used
494 	 * to encode this as a directory
495 	 */
496 	pt = &(arcn->name[arcn->nlen - 1]);
497 	arcn->pad = 0;
498 	arcn->skip = 0;
499 	switch(hd->linkflag) {
500 	case SYMTYPE:
501 		/*
502 		 * symbolic link, need to get the link name and set the type in
503 		 * the st_mode so -v printing will look correct.
504 		 */
505 		arcn->type = PAX_SLK;
506 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
507 			sizeof(hd->linkname));
508 		arcn->ln_name[arcn->ln_nlen] = '\0';
509 		arcn->sb.st_mode |= S_IFLNK;
510 		break;
511 	case LNKTYPE:
512 		/*
513 		 * hard link, need to get the link name, set the type in the
514 		 * st_mode and st_nlink so -v printing will look better.
515 		 */
516 		arcn->type = PAX_HLK;
517 		arcn->sb.st_nlink = 2;
518 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
519 			sizeof(hd->linkname));
520 		arcn->ln_name[arcn->ln_nlen] = '\0';
521 
522 		/*
523 		 * no idea of what type this thing really points at, but
524 		 * we set something for printing only.
525 		 */
526 		arcn->sb.st_mode |= S_IFREG;
527 		break;
528 	case LONGLINKTYPE:
529 		arcn->type = PAX_GLL;
530 		/* FALLTHROUGH */
531 	case LONGNAMETYPE:
532 		/*
533 		 * GNU long link/file; we tag these here and let the
534 		 * pax internals deal with it -- too ugly otherwise.
535 		 */
536 		if (hd->linkflag != LONGLINKTYPE)
537 			arcn->type = PAX_GLF;
538 		arcn->pad = TAR_PAD(arcn->sb.st_size);
539 		arcn->skip = arcn->sb.st_size;
540 		arcn->ln_name[0] = '\0';
541 		arcn->ln_nlen = 0;
542 		break;
543 	case AREGTYPE:
544 	case REGTYPE:
545 	case DIRTYPE:	/* see below */
546 	default:
547 		/*
548 		 * If we have a trailing / this is a directory and NOT a file.
549 		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
550 		 * reported that V7 archives using USTAR directories do exist.
551 		 */
552 		arcn->ln_name[0] = '\0';
553 		arcn->ln_nlen = 0;
554 		if (*pt == '/' || hd->linkflag == DIRTYPE) {
555 			/*
556 			 * it is a directory, set the mode for -v printing
557 			 */
558 			arcn->type = PAX_DIR;
559 			arcn->sb.st_mode |= S_IFDIR;
560 			arcn->sb.st_nlink = 2;
561 		} else {
562 			/*
563 			 * have a file that will be followed by data. Set the
564 			 * skip value to the size field and calculate the size
565 			 * of the padding.
566 			 */
567 			arcn->type = PAX_REG;
568 			arcn->sb.st_mode |= S_IFREG;
569 			arcn->pad = TAR_PAD(arcn->sb.st_size);
570 			arcn->skip = arcn->sb.st_size;
571 		}
572 		break;
573 	}
574 
575 	/*
576 	 * strip off any trailing slash.
577 	 */
578 	if (*pt == '/') {
579 		*pt = '\0';
580 		--arcn->nlen;
581 	}
582 	return(0);
583 }
584 
585 /*
586  * tar_wr()
587  *	write a tar header for the file specified in the ARCHD to the archive.
588  *	Have to check for file types that cannot be stored and file names that
589  *	are too long. Be careful of the term (last arg) to ul_oct, each field
590  *	of tar has it own spec for the termination character(s).
591  *	ASSUMED: space after header in header block is zero filled
592  * Return:
593  *	0 if file has data to be written after the header, 1 if file has NO
594  *	data to write after the header, -1 if archive write failed
595  */
596 
597 #if __STDC__
598 int
599 tar_wr(ARCHD *arcn)
600 #else
601 int
602 tar_wr(arcn)
603 	ARCHD *arcn;
604 #endif
605 {
606 	HD_TAR *hd;
607 	int len;
608 	char hdblk[sizeof(HD_TAR)];
609 
610 	/*
611 	 * check for those file system types which tar cannot store
612 	 */
613 	switch(arcn->type) {
614 	case PAX_DIR:
615 		/*
616 		 * user asked that dirs not be written to the archive
617 		 */
618 		if (tar_nodir)
619 			return(1);
620 		break;
621 	case PAX_CHR:
622 		tty_warn(1, "Tar cannot archive a character device %s",
623 		    arcn->org_name);
624 		return(1);
625 	case PAX_BLK:
626 		tty_warn(1,
627 		    "Tar cannot archive a block device %s", arcn->org_name);
628 		return(1);
629 	case PAX_SCK:
630 		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
631 		return(1);
632 	case PAX_FIF:
633 		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
634 		return(1);
635 	case PAX_SLK:
636 	case PAX_HLK:
637 	case PAX_HRG:
638 		if (arcn->ln_nlen > sizeof(hd->linkname)) {
639 			tty_warn(1,"Link name too long for tar %s",
640 			    arcn->ln_name);
641 			return(1);
642 		}
643 		break;
644 	case PAX_REG:
645 	case PAX_CTG:
646 	default:
647 		break;
648 	}
649 
650 	/*
651 	 * check file name len, remember extra char for dirs (the / at the end)
652 	 */
653 	len = arcn->nlen;
654 	if (arcn->type == PAX_DIR)
655 		++len;
656 	if (len > sizeof(hd->name)) {
657 		tty_warn(1, "File name too long for tar %s", arcn->name);
658 		return(1);
659 	}
660 
661 	/*
662 	 * copy the data out of the ARCHD into the tar header based on the type
663 	 * of the file. Remember many tar readers want the unused fields to be
664 	 * padded with zero. We set the linkflag field (type), the linkname
665 	 * (or zero if not used),the size, and set the padding (if any) to be
666 	 * added after the file data (0 for all other types, as they only have
667 	 * a header)
668 	 */
669 	hd = (HD_TAR *)hdblk;
670 	zf_strncpy(hd->name, arcn->name, sizeof(hd->name));
671 	arcn->pad = 0;
672 
673 	if (arcn->type == PAX_DIR) {
674 		/*
675 		 * directories are the same as files, except have a filename
676 		 * that ends with a /, we add the slash here. No data follows,
677 		 * dirs, so no pad.
678 		 */
679 		hd->linkflag = AREGTYPE;
680 		memset(hd->linkname, 0, sizeof(hd->linkname));
681 		hd->name[len-1] = '/';
682 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
683 			goto out;
684 	} else if (arcn->type == PAX_SLK) {
685 		/*
686 		 * no data follows this file, so no pad
687 		 */
688 		hd->linkflag = SYMTYPE;
689 		zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
690 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
691 			goto out;
692 	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
693 		/*
694 		 * no data follows this file, so no pad
695 		 */
696 		hd->linkflag = LNKTYPE;
697 		zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
698 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
699 			goto out;
700 	} else {
701 		/*
702 		 * data follows this file, so set the pad
703 		 */
704 		hd->linkflag = AREGTYPE;
705 		memset(hd->linkname, 0, sizeof(hd->linkname));
706 #		ifdef NET2_STAT
707 		if (ul_oct((u_long)arcn->sb.st_size, hd->size,
708 		    sizeof(hd->size), 1)) {
709 #		else
710 		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
711 		    sizeof(hd->size), 1)) {
712 #		endif
713 			tty_warn(1,"File is too large for tar %s",
714 			    arcn->org_name);
715 			return(1);
716 		}
717 		arcn->pad = TAR_PAD(arcn->sb.st_size);
718 	}
719 
720 	/*
721 	 * copy those fields that are independent of the type
722 	 */
723 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
724 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
725 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
726 	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
727 		goto out;
728 
729 	/*
730 	 * calculate and add the checksum, then write the header. A return of
731 	 * 0 tells the caller to now write the file data, 1 says no data needs
732 	 * to be written
733 	 */
734 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
735 	    sizeof(hd->chksum), 2))
736 		goto out;
737 	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
738 		return(-1);
739 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
740 		return(-1);
741 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
742 		return(0);
743 	return(1);
744 
745     out:
746 	/*
747 	 * header field is out of range
748 	 */
749 	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
750 	return(1);
751 }
752 
753 /*
754  * Routines for POSIX ustar
755  */
756 
757 /*
758  * ustar_strd()
759  *	initialization for ustar read
760  * Return:
761  *	0 if ok, -1 otherwise
762  */
763 
764 #if __STDC__
765 int
766 ustar_strd(void)
767 #else
768 int
769 ustar_strd()
770 #endif
771 {
772 	return(0);
773 }
774 
775 /*
776  * ustar_stwr()
777  *	initialization for ustar write
778  * Return:
779  *	0 if ok, -1 otherwise
780  */
781 
782 #if __STDC__
783 int
784 ustar_stwr(void)
785 #else
786 int
787 ustar_stwr()
788 #endif
789 {
790 	return(0);
791 }
792 
793 /*
794  * ustar_id()
795  *	determine if a block given to us is a valid ustar header. We have to
796  *	be on the lookout for those pesky blocks of all zero's
797  * Return:
798  *	0 if a ustar header, -1 otherwise
799  */
800 
801 #if __STDC__
802 int
803 ustar_id(char *blk, int size)
804 #else
805 int
806 ustar_id(blk, size)
807 	char *blk;
808 	int size;
809 #endif
810 {
811 	HD_USTAR *hd;
812 
813 	if (size < BLKMULT)
814 		return(-1);
815 	hd = (HD_USTAR *)blk;
816 
817 	/*
818 	 * check for block of zero's first, a simple and fast test then check
819 	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
820 	 * programs are fouled up and create archives missing the \0. Last we
821 	 * check the checksum. If ok we have to assume it is a valid header.
822 	 */
823 	if (hd->name[0] == '\0')
824 		return(-1);
825 	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
826 		return(-1);
827 	if (!strncmp(hd->magic, "ustar  ", 8))
828 		is_oldgnutar = 1;
829 	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
830 		return(-1);
831 	return(0);
832 }
833 
834 /*
835  * ustar_rd()
836  *	extract the values out of block already determined to be a ustar header.
837  *	store the values in the ARCHD parameter.
838  * Return:
839  *	0
840  */
841 
842 #if __STDC__
843 int
844 ustar_rd(ARCHD *arcn, char *buf)
845 #else
846 int
847 ustar_rd(arcn, buf)
848 	ARCHD *arcn;
849 	char *buf;
850 #endif
851 {
852 	HD_USTAR *hd;
853 	char *dest;
854 	int cnt;
855 	dev_t devmajor;
856 	dev_t devminor;
857 
858 	/*
859 	 * we only get proper sized buffers
860 	 */
861 	if (ustar_id(buf, BLKMULT) < 0)
862 		return(-1);
863 	arcn->org_name = arcn->name;
864 	arcn->sb.st_nlink = 1;
865 	arcn->pat = NULL;
866 	hd = (HD_USTAR *)buf;
867 
868 	/*
869 	 * see if the filename is split into two parts. if, so joint the parts.
870 	 * we copy the prefix first and add a / between the prefix and name.
871 	 */
872 	dest = arcn->name;
873 	if (*(hd->prefix) != '\0') {
874 		cnt = l_strncpy(arcn->name, hd->prefix, sizeof(hd->prefix));
875 		dest += cnt;
876 		*dest++ = '/';
877 	}
878 	cnt = l_strncpy(dest, hd->name, sizeof(hd->name));
879 	dest += cnt;
880 	*dest = '\0';
881 	arcn->nlen = dest - arcn->name;
882 
883 	/*
884 	 * follow the spec to the letter. we should only have mode bits, strip
885 	 * off all other crud we may be passed.
886 	 */
887 	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
888 	    0xfff);
889 #	ifdef NET2_STAT
890 	arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
891 #	else
892 	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
893 #	endif
894 	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
895 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
896 
897 	/*
898 	 * If we can find the ascii names for gname and uname in the password
899 	 * and group files we will use the uid's and gid they bind. Otherwise
900 	 * we use the uid and gid values stored in the header. (This is what
901 	 * the posix spec wants).
902 	 */
903 	hd->gname[sizeof(hd->gname) - 1] = '\0';
904 	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
905 		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
906 	hd->uname[sizeof(hd->uname) - 1] = '\0';
907 	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
908 		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
909 
910 	/*
911 	 * set the defaults, these may be changed depending on the file type
912 	 */
913 	arcn->ln_name[0] = '\0';
914 	arcn->ln_nlen = 0;
915 	arcn->pad = 0;
916 	arcn->skip = 0;
917 	arcn->sb.st_rdev = (dev_t)0;
918 
919 	/*
920 	 * set the mode and PAX type according to the typeflag in the header
921 	 */
922 	switch(hd->typeflag) {
923 	case FIFOTYPE:
924 		arcn->type = PAX_FIF;
925 		arcn->sb.st_mode |= S_IFIFO;
926 		break;
927 	case DIRTYPE:
928 		arcn->type = PAX_DIR;
929 		arcn->sb.st_mode |= S_IFDIR;
930 		arcn->sb.st_nlink = 2;
931 
932 		/*
933 		 * Some programs that create ustar archives append a '/'
934 		 * to the pathname for directories. This clearly violates
935 		 * ustar specs, but we will silently strip it off anyway.
936 		 */
937 		if (arcn->name[arcn->nlen - 1] == '/')
938 			arcn->name[--arcn->nlen] = '\0';
939 		break;
940 	case BLKTYPE:
941 	case CHRTYPE:
942 		/*
943 		 * this type requires the rdev field to be set.
944 		 */
945 		if (hd->typeflag == BLKTYPE) {
946 			arcn->type = PAX_BLK;
947 			arcn->sb.st_mode |= S_IFBLK;
948 		} else {
949 			arcn->type = PAX_CHR;
950 			arcn->sb.st_mode |= S_IFCHR;
951 		}
952 		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
953 		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
954 		arcn->sb.st_rdev = TODEV(devmajor, devminor);
955 		break;
956 	case SYMTYPE:
957 	case LNKTYPE:
958 		if (hd->typeflag == SYMTYPE) {
959 			arcn->type = PAX_SLK;
960 			arcn->sb.st_mode |= S_IFLNK;
961 		} else {
962 			arcn->type = PAX_HLK;
963 			/*
964 			 * so printing looks better
965 			 */
966 			arcn->sb.st_mode |= S_IFREG;
967 			arcn->sb.st_nlink = 2;
968 		}
969 		/*
970 		 * copy the link name
971 		 */
972 		arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname,
973 			sizeof(hd->linkname));
974 		arcn->ln_name[arcn->ln_nlen] = '\0';
975 		break;
976 	case CONTTYPE:
977 	case AREGTYPE:
978 	case REGTYPE:
979 	default:
980 		/*
981 		 * these types have file data that follows. Set the skip and
982 		 * pad fields.
983 		 */
984 		arcn->type = PAX_REG;
985 		arcn->pad = TAR_PAD(arcn->sb.st_size);
986 		arcn->skip = arcn->sb.st_size;
987 		arcn->sb.st_mode |= S_IFREG;
988 		break;
989 	}
990 	return(0);
991 }
992 
993 /*
994  * ustar_wr()
995  *	write a ustar header for the file specified in the ARCHD to the archive
996  *	Have to check for file types that cannot be stored and file names that
997  *	are too long. Be careful of the term (last arg) to ul_oct, we only use
998  *	'\0' for the termination character (this is different than picky tar)
999  *	ASSUMED: space after header in header block is zero filled
1000  * Return:
1001  *	0 if file has data to be written after the header, 1 if file has NO
1002  *	data to write after the header, -1 if archive write failed
1003  */
1004 
1005 #if __STDC__
1006 int
1007 ustar_wr(ARCHD *arcn)
1008 #else
1009 int
1010 ustar_wr(arcn)
1011 	ARCHD *arcn;
1012 #endif
1013 {
1014 	HD_USTAR *hd;
1015 	char *pt;
1016 	char hdblk[sizeof(HD_USTAR)];
1017 	const char *user, *group;
1018 
1019 	/*
1020 	 * check for those file system types ustar cannot store
1021 	 */
1022 	if (arcn->type == PAX_SCK) {
1023 		tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name);
1024 		return(1);
1025 	}
1026 
1027 	/*
1028 	 * check the length of the linkname
1029 	 */
1030 	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
1031 	    (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){
1032 		tty_warn(1, "Link name too long for ustar %s", arcn->ln_name);
1033 		return(1);
1034 	}
1035 
1036 	/*
1037 	 * split the path name into prefix and name fields (if needed). if
1038 	 * pt != arcn->name, the name has to be split
1039 	 */
1040 	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1041 		tty_warn(1, "File name too long for ustar %s", arcn->name);
1042 		return(1);
1043 	}
1044 	hd = (HD_USTAR *)hdblk;
1045 	arcn->pad = 0L;
1046 
1047 	/*
1048 	 * split the name, or zero out the prefix
1049 	 */
1050 	if (pt != arcn->name) {
1051 		/*
1052 		 * name was split, pt points at the / where the split is to
1053 		 * occur, we remove the / and copy the first part to the prefix
1054 		 */
1055 		*pt = '\0';
1056 		zf_strncpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1057 		*pt++ = '/';
1058 	} else
1059 		memset(hd->prefix, 0, sizeof(hd->prefix));
1060 
1061 	/*
1062 	 * copy the name part. this may be the whole path or the part after
1063 	 * the prefix
1064 	 */
1065 	zf_strncpy(hd->name, pt, sizeof(hd->name));
1066 
1067 	/*
1068 	 * set the fields in the header that are type dependent
1069 	 */
1070 	switch(arcn->type) {
1071 	case PAX_DIR:
1072 		hd->typeflag = DIRTYPE;
1073 		memset(hd->linkname, 0, sizeof(hd->linkname));
1074 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
1075 		memset(hd->devminor, 0, sizeof(hd->devminor));
1076 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1077 			goto out;
1078 		break;
1079 	case PAX_CHR:
1080 	case PAX_BLK:
1081 		if (arcn->type == PAX_CHR)
1082 			hd->typeflag = CHRTYPE;
1083 		else
1084 			hd->typeflag = BLKTYPE;
1085 		memset(hd->linkname, 0, sizeof(hd->linkname));
1086 		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1087 		   sizeof(hd->devmajor), 3) ||
1088 		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1089 		   sizeof(hd->devminor), 3) ||
1090 		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1091 			goto out;
1092 		break;
1093 	case PAX_FIF:
1094 		hd->typeflag = FIFOTYPE;
1095 		memset(hd->linkname, 0, sizeof(hd->linkname));
1096 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
1097 		memset(hd->devminor, 0, sizeof(hd->devminor));
1098 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1099 			goto out;
1100 		break;
1101 	case PAX_SLK:
1102 	case PAX_HLK:
1103 	case PAX_HRG:
1104 		if (arcn->type == PAX_SLK)
1105 			hd->typeflag = SYMTYPE;
1106 		else
1107 			hd->typeflag = LNKTYPE;
1108 		zf_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname));
1109 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
1110 		memset(hd->devminor, 0, sizeof(hd->devminor));
1111 		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1112 			goto out;
1113 		break;
1114 	case PAX_REG:
1115 	case PAX_CTG:
1116 	default:
1117 		/*
1118 		 * file data with this type, set the padding
1119 		 */
1120 		if (arcn->type == PAX_CTG)
1121 			hd->typeflag = CONTTYPE;
1122 		else
1123 			hd->typeflag = REGTYPE;
1124 		memset(hd->linkname, 0, sizeof(hd->linkname));
1125 		memset(hd->devmajor, 0, sizeof(hd->devmajor));
1126 		memset(hd->devminor, 0, sizeof(hd->devminor));
1127 		arcn->pad = TAR_PAD(arcn->sb.st_size);
1128 #		ifdef NET2_STAT
1129 		if (ul_oct((u_long)arcn->sb.st_size, hd->size,
1130 		    sizeof(hd->size), 3)) {
1131 #		else
1132 		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
1133 		    sizeof(hd->size), 3)) {
1134 #		endif
1135 			tty_warn(1,"File is too long for ustar %s",
1136 			    arcn->org_name);
1137 			return(1);
1138 		}
1139 		break;
1140 	}
1141 
1142 	zf_strncpy(hd->magic, TMAGIC, TMAGLEN);
1143 	zf_strncpy(hd->version, TVERSION, TVERSLEN);
1144 
1145 	/*
1146 	 * set the remaining fields. Some versions want all 16 bits of mode
1147 	 * we better humor them (they really do not meet spec though)....
1148 	 */
1149 	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1150 	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1151 	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1152 	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1153 		goto out;
1154 	user = user_from_uid(arcn->sb.st_uid, 1);
1155 	group = group_from_gid(arcn->sb.st_gid, 1);
1156 	zf_strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1157 	zf_strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1158 
1159 	/*
1160 	 * calculate and store the checksum write the header to the archive
1161 	 * return 0 tells the caller to now write the file data, 1 says no data
1162 	 * needs to be written
1163 	 */
1164 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1165 	   sizeof(hd->chksum), 3))
1166 		goto out;
1167 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1168 		return(-1);
1169 	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1170 		return(-1);
1171 	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1172 		return(0);
1173 	return(1);
1174 
1175     out:
1176 	/*
1177 	 * header field is out of range
1178 	 */
1179 	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1180 	return(1);
1181 }
1182 
1183 /*
1184  * name_split()
1185  *	see if the name has to be split for storage in a ustar header. We try
1186  *	to fit the entire name in the name field without splitting if we can.
1187  *	The split point is always at a /
1188  * Return
1189  *	character pointer to split point (always the / that is to be removed
1190  *	if the split is not needed, the points is set to the start of the file
1191  *	name (it would violate the spec to split there). A NULL is returned if
1192  *	the file name is too long
1193  */
1194 
1195 #if __STDC__
1196 static char *
1197 name_split(char *name, int len)
1198 #else
1199 static char *
1200 name_split(name, len)
1201 	char *name;
1202 	int len;
1203 #endif
1204 {
1205 	char *start;
1206 
1207 	/*
1208 	 * check to see if the file name is small enough to fit in the name
1209 	 * field. if so just return a pointer to the name.
1210 	 */
1211 	if (len <= TNMSZ)
1212 		return(name);
1213 	if (len > (TPFSZ + TNMSZ + 1))
1214 		return(NULL);
1215 
1216 	/*
1217 	 * we start looking at the biggest sized piece that fits in the name
1218 	 * field. We walk forward looking for a slash to split at. The idea is
1219 	 * to find the biggest piece to fit in the name field (or the smallest
1220 	 * prefix we can find) (the -1 is correct the biggest piece would
1221 	 * include the slash between the two parts that gets thrown away)
1222 	 */
1223 	start = name + len - TNMSZ - 1;
1224 	while ((*start != '\0') && (*start != '/'))
1225 		++start;
1226 
1227 	/*
1228 	 * if we hit the end of the string, this name cannot be split, so we
1229 	 * cannot store this file.
1230 	 */
1231 	if (*start == '\0')
1232 		return(NULL);
1233 	len = start - name;
1234 
1235 	/*
1236 	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1237 	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1238 	 * the file would then expand on extract to //str. The len == 0 below
1239 	 * makes this special case follow the spec to the letter.
1240 	 */
1241 	if ((len > TPFSZ) || (len == 0))
1242 		return(NULL);
1243 
1244 	/*
1245 	 * ok have a split point, return it to the caller
1246 	 */
1247 	return(start);
1248 }
1249 
1250 /*
1251  * deal with GNU tar -X switch.  basically, we go through each line of
1252  * the file, building a string from the "glob" lines in the file into
1253  * RE lines, of the form `/^RE$//', which we pass to rep_add(), which
1254  * will add a empty replacement (exclusion), for the named files.
1255  */
1256 int
1257 tar_gnutar_X_compat(path)
1258 	const char *path;
1259 {
1260 	char *line, sbuf[MAXPATHLEN * 2 + 1 + 5];
1261 	FILE *fp;
1262 	int lineno = 0, i, j;
1263 	size_t len;
1264 
1265 	fp = fopen(path, "r");
1266 	if (fp == NULL) {
1267 		tty_warn(1, "can not open %s: %s", path,
1268 		    strerror(errno));
1269 		return(-1);
1270 	}
1271 
1272 	while ((line = fgetln(fp, &len))) {
1273 		lineno++;
1274 		if (len > MAXPATHLEN) {
1275 			tty_warn(0, "pathname too long, line %d of %s",
1276 			    lineno, path);
1277 		}
1278 		if (line[len - 1] == '\n')
1279 			len--;
1280 		for (i = 0, j = 2; i < len; i++) {
1281 			/*
1282 			 * convert glob to regexp, escaping everything
1283 			 */
1284 			if (line[i] == '*')
1285 				sbuf[j++] = '.';
1286 			else if (line[i] == '?')
1287 				line[i] = '.';
1288 			else if (!isalnum(line[i]) && !isblank(line[i]))
1289 				sbuf[j++] = '\\';
1290 			sbuf[j++] = line[i];
1291 		}
1292 		sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1293 		sbuf[1] = '^';
1294 		sbuf[j] = '$';
1295 		sbuf[j + 3] = '\0';
1296 		if (rep_add(sbuf) < 0)
1297 			return (-1);
1298 	}
1299 	return (0);
1300 }
1301