xref: /netbsd-src/bin/pax/ar_subs.c (revision 2b9c0f9f43ba74451b873b59bedeb267f2fc4a30)
1 /*	$NetBSD: ar_subs.c,v 1.59 2024/08/05 13:37:26 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Keith Muller.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Keith Muller of the University of California, San Diego.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #if HAVE_NBTOOL_CONFIG_H
37 #include "nbtool_config.h"
38 #endif
39 
40 #include <sys/cdefs.h>
41 #if !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)ar_subs.c	8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: ar_subs.c,v 1.59 2024/08/05 13:37:26 riastradh Exp $");
46 #endif
47 #endif /* not lint */
48 
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 #include <signal.h>
54 #include <string.h>
55 #include <stdio.h>
56 #include <ctype.h>
57 #include <fcntl.h>
58 #include <errno.h>
59 #include <time.h>
60 #include <unistd.h>
61 #include <stdlib.h>
62 #include "pax.h"
63 #include "pat_rep.h"
64 #include "extern.h"
65 
66 static int path_check(ARCHD *, int);
67 static int wr_archive(ARCHD *, int is_app);
68 static int get_arc(void);
69 static int next_head(ARCHD *);
70 #if !HAVE_NBTOOL_CONFIG_H
71 static int fdochroot(int);
72 #endif
73 extern sigset_t s_mask;
74 
75 /*
76  * Routines which control the overall operation modes of pax as specified by
77  * the user: list, append, read ...
78  */
79 
80 static char hdbuf[BLKMULT];		/* space for archive header on read */
81 u_long flcnt;				/* number of files processed */
82 ARCHD archd;
83 
84 static char	cwdpath[MAXPATHLEN];	/* current working directory path */
85 static size_t	cwdpathlen;		/* current working directory path len */
86 
87 int
88 updatepath(void)
89 {
90 	if (getcwd(cwdpath, sizeof(cwdpath)) == NULL) {
91 		syswarn(1, errno, "Cannot get working directory");
92 		return -1;
93 	}
94 	cwdpathlen = strlen(cwdpath);
95 	return 0;
96 }
97 
98 int
99 fdochdir(int fcwd)
100 {
101 	if (fchdir(fcwd) == -1) {
102 		syswarn(1, errno, "Cannot chdir to `.'");
103 		return -1;
104 	}
105 	return updatepath();
106 }
107 
108 int
109 dochdir(const char *name)
110 {
111 	if (chdir(name) == -1)
112 		syswarn(1, errno, "Cannot chdir to `%s'", name);
113 	return updatepath();
114 }
115 
116 #if !HAVE_NBTOOL_CONFIG_H
117 static int
118 fdochroot(int fcwd)
119 {
120 	if (fchroot(fcwd) != 0) {
121 		syswarn(1, errno, "Can't fchroot to \".\"");
122 		return -1;
123 	}
124 	return updatepath();
125 }
126 #endif
127 
128 /*
129  * mkdir(), but if we failed, check if someone else made it for us
130  * already and don't error out.
131  */
132 int
133 domkdir(const char *fname, mode_t mode)
134 {
135 	int error;
136 	struct stat sb;
137 
138 	if ((error = mkdir(fname, mode)) != -1)
139 		return error;
140 
141 	switch (errno) {
142 	case EISDIR:
143 		return 0;
144 	case EEXIST:
145 	case EACCES:
146 	case ENOSYS:	/* Grr Solaris */
147 	case EROFS:
148 		error = errno;
149 		if (stat(fname, &sb) != -1 && S_ISDIR(sb.st_mode))
150 			return 0;
151 		errno = error;
152 		/*FALLTHROUGH*/
153 	default:
154 		return -1;
155 	}
156 }
157 
158 static int
159 path_check(ARCHD *arcn, int level)
160 {
161 	char buf[MAXPATHLEN];
162 	char *p;
163 
164 	if ((p = strrchr(arcn->name, '/')) == NULL)
165 		return 0;
166 	*p = '\0';
167 
168 	if (realpath(arcn->name, buf) == NULL) {
169 		int error;
170 		error = path_check(arcn, level + 1);
171 		*p = '/';
172 		if (error == 0)
173 			return 0;
174 		if (level == 0)
175 			syswarn(1, 0, "Cannot resolve `%s'", arcn->name);
176 		return -1;
177 	}
178 	if (strncmp(buf, cwdpath, cwdpathlen) != 0) {
179 		*p = '/';
180 		syswarn(1, 0, "Attempt to write file `%s' that resolves into "
181 		    "`%s/%s' outside current working directory `%s' ignored",
182 		    arcn->name, buf, p + 1, cwdpath);
183 		return -1;
184 	}
185 	*p = '/';
186 	return 0;
187 }
188 
189 /*
190  * list()
191  *	list the contents of an archive which match user supplied pattern(s)
192  *	(if no pattern is supplied, list entire contents).
193  */
194 
195 int
196 list(void)
197 {
198 	ARCHD *arcn;
199 	int res;
200 	time_t now;
201 
202 	arcn = &archd;
203 	/*
204 	 * figure out archive type; pass any format specific options to the
205 	 * archive option processing routine; call the format init routine. We
206 	 * also save current time for ls_list() so we do not make a system
207 	 * call for each file we need to print. If verbose (vflag) start up
208 	 * the name and group caches.
209 	 */
210 	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
211 	    ((*frmt->st_rd)() < 0))
212 		return 1;
213 
214 	now = time(NULL);
215 
216 	/*
217 	 * step through the archive until the format says it is done
218 	 */
219 	while (next_head(arcn) == 0) {
220 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
221 			/*
222 			 * we need to read, to get the real filename
223 			 */
224 			off_t cnt;
225 			if (!(*frmt->rd_data)(arcn, -arcn->type, &cnt))
226 				(void)rd_skip(cnt + arcn->pad);
227 			continue;
228 		}
229 
230 		/*
231 		 * check for pattern, and user specified options match.
232 		 * When all patterns are matched we are done.
233 		 */
234 		if ((res = pat_match(arcn)) < 0)
235 			break;
236 
237 		if ((res == 0) && (sel_chk(arcn) == 0)) {
238 			/*
239 			 * pattern resulted in a selected file
240 			 */
241 			if (pat_sel(arcn) < 0)
242 				break;
243 
244 			/*
245 			 * modify the name as requested by the user if name
246 			 * survives modification, do a listing of the file
247 			 */
248 			if ((res = mod_name(arcn, RENM)) < 0)
249 				break;
250 			if (res == 0) {
251 				if (arcn->name[0] == '/' && !check_Aflag()) {
252 					memmove(arcn->name, arcn->name + 1,
253 					    strlen(arcn->name));
254 				}
255 				ls_list(arcn, now, stdout);
256 			}
257 			/*
258 			 * if there's an error writing to stdout then we must
259 			 * stop now -- we're probably writing to a pipe that
260 			 * has been closed by the reader.
261 			 */
262 			if (ferror(stdout)) {
263 				syswarn(1, errno, "Listing incomplete.");
264 				break;
265 			}
266 		}
267 		/*
268 		 * skip to next archive format header using values calculated
269 		 * by the format header read routine
270 		 */
271 		if (rd_skip(arcn->skip + arcn->pad) == 1)
272 			break;
273 	}
274 
275 	/*
276 	 * all done, let format have a chance to cleanup, and make sure that
277 	 * the patterns supplied by the user were all matched
278 	 */
279 	(void)(*frmt->end_rd)();
280 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
281 	ar_close();
282 	pat_chk();
283 
284 	return 0;
285 }
286 
287 /*
288  * extract()
289  *	extract the member(s) of an archive as specified by user supplied
290  *	pattern(s) (no patterns extracts all members)
291  */
292 
293 int
294 extract(void)
295 {
296 	ARCHD *arcn;
297 	int res;
298 	off_t cnt;
299 	struct stat sb;
300 	int fd;
301 	time_t now;
302 
303 	arcn = &archd;
304 	/*
305 	 * figure out archive type; pass any format specific options to the
306 	 * archive option processing routine; call the format init routine;
307 	 * start up the directory modification time and access mode database
308 	 */
309 	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
310 	    ((*frmt->st_rd)() < 0) || (dir_start() < 0))
311 		return 1;
312 
313 	now = time(NULL);
314 #if !HAVE_NBTOOL_CONFIG_H
315 	if (do_chroot)
316 		(void)fdochroot(cwdfd);
317 #endif
318 
319 	/*
320 	 * When we are doing interactive rename, we store the mapping of names
321 	 * so we can fix up hard links files later in the archive.
322 	 */
323 	if (iflag && (name_start() < 0))
324 		return 1;
325 
326 	/*
327 	 * step through each entry on the archive until the format read routine
328 	 * says it is done
329 	 */
330 	while (next_head(arcn) == 0) {
331 		int write_to_hard_link = 0;
332 
333 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
334 			/*
335 			 * we need to read, to get the real filename
336 			 */
337 			if (!(*frmt->rd_data)(arcn, -arcn->type, &cnt))
338 				(void)rd_skip(cnt + arcn->pad);
339 			continue;
340 		}
341 
342 		/*
343 		 * check for pattern, and user specified options match. When
344 		 * all the patterns are matched we are done
345 		 */
346 		if ((res = pat_match(arcn)) < 0)
347 			break;
348 
349 		if ((res > 0) || (sel_chk(arcn) != 0)) {
350 			/*
351 			 * file is not selected. skip past any file
352 			 * data and padding and go back for the next
353 			 * archive member
354 			 */
355 			(void)rd_skip(arcn->skip + arcn->pad);
356 			continue;
357 		}
358 
359 		if (kflag && (lstat(arcn->name, &sb) == 0)) {
360 			(void)rd_skip(arcn->skip + arcn->pad);
361 			continue;
362 		}
363 
364 		/*
365 		 * with -u or -D only extract when the archive member is newer
366 		 * than the file with the same name in the file system (no
367 		 * test of being the same type is required).
368 		 * NOTE: this test is done BEFORE name modifications as
369 		 * specified by pax. this operation can be confusing to the
370 		 * user who might expect the test to be done on an existing
371 		 * file AFTER the name mod. In honesty the pax spec is probably
372 		 * flawed in this respect.  ignore this for GNU long links.
373 		 */
374 		if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) {
375 			if (uflag && Dflag) {
376 				if ((arcn->sb.st_mtime <= sb.st_mtime) &&
377 				    (arcn->sb.st_ctime <= sb.st_ctime)) {
378 					(void)rd_skip(arcn->skip + arcn->pad);
379 					continue;
380 				}
381 			} else if (Dflag) {
382 				if (arcn->sb.st_ctime <= sb.st_ctime) {
383 					(void)rd_skip(arcn->skip + arcn->pad);
384 					continue;
385 				}
386 			} else if (arcn->sb.st_mtime <= sb.st_mtime) {
387 				(void)rd_skip(arcn->skip + arcn->pad);
388 				continue;
389 			}
390 		}
391 
392 		/*
393 		 * this archive member is now been selected. modify the name.
394 		 */
395 		if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn, RENM)) < 0))
396 			break;
397 		if (res > 0) {
398 			/*
399 			 * a bad name mod, skip and purge name from link table
400 			 */
401 			purg_lnk(arcn);
402 			(void)rd_skip(arcn->skip + arcn->pad);
403 			continue;
404 		}
405 
406 		if (arcn->name[0] == '/' && !check_Aflag()) {
407 			memmove(arcn->name, arcn->name + 1, strlen(arcn->name));
408 		}
409 		/*
410 		 * Non standard -Y and -Z flag. When the existing file is
411 		 * same age or newer skip; ignore this for GNU long links.
412 		 */
413 		if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) {
414 			if (Yflag && Zflag) {
415 				if ((arcn->sb.st_mtime <= sb.st_mtime) &&
416 				    (arcn->sb.st_ctime <= sb.st_ctime)) {
417 					(void)rd_skip(arcn->skip + arcn->pad);
418 					continue;
419 				}
420 			} else if (Yflag) {
421 				if (arcn->sb.st_ctime <= sb.st_ctime) {
422 					(void)rd_skip(arcn->skip + arcn->pad);
423 					continue;
424 				}
425 			} else if (arcn->sb.st_mtime <= sb.st_mtime) {
426 				(void)rd_skip(arcn->skip + arcn->pad);
427 				continue;
428 			}
429 		}
430 
431 		if (vflag) {
432 			if (vflag > 1)
433 				ls_list(arcn, now, listf);
434 			else {
435 				(void)safe_print(arcn->name, listf);
436 				vfpart = 1;
437 			}
438 		}
439 
440 		/*
441 		 * if required, chdir around.
442 		 */
443 		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL) &&
444 		    !to_stdout)
445 			dochdir(arcn->pat->chdname);
446 
447 		if (secure && path_check(arcn, 0) != 0) {
448 			(void)rd_skip(arcn->skip + arcn->pad);
449 			continue;
450 		}
451 
452 		/*
453 		 * all ok, extract this member based on type
454 		 */
455 		if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) {
456 			/*
457 			 * process archive members that are not regular files.
458 			 * throw out padding and any data that might follow the
459 			 * header (as determined by the format).
460 			 */
461 			if ((arcn->type == PAX_HLK) ||
462 			    (arcn->type == PAX_HRG))
463 				res = lnk_creat(arcn, &write_to_hard_link);
464 			else
465 				res = node_creat(arcn);
466 
467 			if (!write_to_hard_link) {
468 				(void)rd_skip(arcn->skip + arcn->pad);
469 				if (res < 0)
470 					purg_lnk(arcn);
471 
472 				if (vflag && vfpart) {
473 					(void)putc('\n', listf);
474 					vfpart = 0;
475 				}
476 				continue;
477 			}
478 		}
479 		if (to_stdout)
480 			fd = STDOUT_FILENO;
481 		else {
482 			/*
483 			 * We have a file with data here. If we cannot create
484 			 * it, skip over the data and purge the name from hard
485 			 * link table.
486 			 */
487 			if ((fd = file_creat(arcn, write_to_hard_link)) < 0) {
488 				(void)fflush(listf);
489 				(void)rd_skip(arcn->skip + arcn->pad);
490 				purg_lnk(arcn);
491 				continue;
492 			}
493 		}
494 		/*
495 		 * extract the file from the archive and skip over padding and
496 		 * any unprocessed data
497 		 */
498 		res = (*frmt->rd_data)(arcn, fd, &cnt);
499 		if (!to_stdout)
500 			file_close(arcn, fd);
501 		if (vflag && vfpart) {
502 			(void)putc('\n', listf);
503 			vfpart = 0;
504 		}
505 		if (!res)
506 			(void)rd_skip(cnt + arcn->pad);
507 
508 		/*
509 		 * if required, chdir around.
510 		 */
511 		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
512 			fdochdir(cwdfd);
513 	}
514 
515 	/*
516 	 * all done, restore directory modes and times as required; make sure
517 	 * all patterns supplied by the user were matched; block off signals
518 	 * to avoid chance for multiple entry into the cleanup code.
519 	 */
520 	(void)(*frmt->end_rd)();
521 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
522 	ar_close();
523 	proc_dir();
524 	pat_chk();
525 
526 	return 0;
527 }
528 
529 /*
530  * wr_archive()
531  *	Write an archive. used in both creating a new archive and appends on
532  *	previously written archive.
533  */
534 
535 static int
536 wr_archive(ARCHD *arcn, int is_app)
537 {
538 	int res;
539 	int hlk;
540 	int wr_one;
541 	off_t cnt;
542 	int (*wrf)(ARCHD *);
543 	int fd = -1;
544 	time_t now;
545 
546 	/*
547 	 * if this format supports hard link storage, start up the database
548 	 * that detects them.
549 	 */
550 	if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0))
551 		return 1;
552 
553 	/*
554 	 * start up the file traversal code and format specific write
555 	 */
556 	if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0))
557 		return 1;
558 	wrf = frmt->wr;
559 
560 	now = time(NULL);
561 
562 	/*
563 	 * When we are doing interactive rename, we store the mapping of names
564 	 * so we can fix up hard links files later in the archive.
565 	 */
566 	if (iflag && (name_start() < 0))
567 		return 1;
568 
569 	/*
570 	 * if this is not append, and there are no files, we do no write a trailer
571 	 */
572 	wr_one = is_app;
573 
574 	/*
575 	 * while there are files to archive, process them one at at time
576 	 */
577 	while (next_file(arcn) == 0) {
578 		/*
579 		 * check if this file meets user specified options match.
580 		 */
581 		if (sel_chk(arcn) != 0)
582 			continue;
583 		/*
584 		 * Here we handle the exclusion -X gnu style patterns which
585 		 * are implemented like a pattern list. We don't modify the
586 		 * name as this will be done below again, and we don't want
587 		 * to double modify it.
588 		 */
589 		if ((res = mod_name(arcn, 0)) < 0)
590 			break;
591 		if (res == 1)
592 			continue;
593 		fd = -1;
594 		if (uflag) {
595 			/*
596 			 * only archive if this file is newer than a file with
597 			 * the same name that is already stored on the archive
598 			 */
599 			if ((res = chk_ftime(arcn)) < 0)
600 				break;
601 			if (res > 0)
602 				continue;
603 		}
604 
605 		/*
606 		 * this file is considered selected now. see if this is a hard
607 		 * link to a file already stored
608 		 */
609 		ftree_sel(arcn);
610 		if (hlk && (chk_lnk(arcn) < 0))
611 			break;
612 
613 		if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) ||
614 		    (arcn->type == PAX_CTG)) {
615 			/*
616 			 * we will have to read this file. by opening it now we
617 			 * can avoid writing a header to the archive for a file
618 			 * we were later unable to read (we also purge it from
619 			 * the link table).
620 			 */
621 			if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) {
622 				syswarn(1, errno, "Unable to open %s to read",
623 					arcn->org_name);
624 				purg_lnk(arcn);
625 				continue;
626 			}
627 		}
628 
629 		/*
630 		 * Now modify the name as requested by the user
631 		 */
632 		if ((res = mod_name(arcn, RENM)) < 0) {
633 			/*
634 			 * name modification says to skip this file, close the
635 			 * file and purge link table entry
636 			 */
637 			rdfile_close(arcn, &fd);
638 			purg_lnk(arcn);
639 			break;
640 		}
641 
642 		if (arcn->name[0] == '/' && !check_Aflag()) {
643 			memmove(arcn->name, arcn->name + 1, strlen(arcn->name));
644 		}
645 
646 		if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) {
647 			/*
648 			 * unable to obtain the crc we need, close the file,
649 			 * purge link table entry
650 			 */
651 			rdfile_close(arcn, &fd);
652 			purg_lnk(arcn);
653 			continue;
654 		}
655 
656 		if (vflag) {
657 			if (vflag > 1)
658 				ls_list(arcn, now, listf);
659 			else {
660 				(void)safe_print(arcn->name, listf);
661 				vfpart = 1;
662 			}
663 		}
664 		++flcnt;
665 
666 		/*
667 		 * looks safe to store the file, have the format specific
668 		 * routine write routine store the file header on the archive
669 		 */
670 		if ((res = (*wrf)(arcn)) < 0) {
671 			rdfile_close(arcn, &fd);
672 			break;
673 		}
674 		wr_one = 1;
675 		if (res > 0) {
676 			/*
677 			 * format write says no file data needs to be stored
678 			 * so we are done messing with this file
679 			 */
680 			if (vflag && vfpart) {
681 				(void)putc('\n', listf);
682 				vfpart = 0;
683 			}
684 			rdfile_close(arcn, &fd);
685 			continue;
686 		}
687 
688 		/*
689 		 * Add file data to the archive, quit on write error. if we
690 		 * cannot write the entire file contents to the archive we
691 		 * must pad the archive to replace the missing file data
692 		 * (otherwise during an extract the file header for the file
693 		 * which FOLLOWS this one will not be where we expect it to
694 		 * be).
695 		 */
696 		res = (*frmt->wr_data)(arcn, fd, &cnt);
697 		rdfile_close(arcn, &fd);
698 		if (vflag && vfpart) {
699 			(void)putc('\n', listf);
700 			vfpart = 0;
701 		}
702 		if (res < 0)
703 			break;
704 
705 		/*
706 		 * pad as required, cnt is number of bytes not written
707 		 */
708 		if (((cnt > 0) && (wr_skip(cnt) < 0)) ||
709 		    ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0)))
710 			break;
711 	}
712 
713 	/*
714 	 * tell format to write trailer; pad to block boundary; reset directory
715 	 * mode/access times, and check if all patterns supplied by the user
716 	 * were matched. block off signals to avoid chance for multiple entry
717 	 * into the cleanup code
718 	 */
719 	if (wr_one) {
720 		(*frmt->end_wr)();
721 		wr_fin();
722 	}
723 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
724 	ar_close();
725 	if (tflag)
726 		proc_dir();
727 	ftree_chk();
728 
729 	return 0;
730 }
731 
732 /*
733  * append()
734  *	Add file to previously written archive. Archive format specified by the
735  *	user must agree with archive. The archive is read first to collect
736  *	modification times (if -u) and locate the archive trailer. The archive
737  *	is positioned in front of the record with the trailer and wr_archive()
738  *	is called to add the new members.
739  *	PAX IMPLEMENTATION DETAIL NOTE:
740  *	-u is implemented by adding the new members to the end of the archive.
741  *	Care is taken so that these do not end up as links to the older
742  *	version of the same file already stored in the archive. It is expected
743  *	when extraction occurs these newer versions will over-write the older
744  *	ones stored "earlier" in the archive (this may be a bad assumption as
745  *	it depends on the implementation of the program doing the extraction).
746  *	It is really difficult to splice in members without either re-writing
747  *	the entire archive (from the point were the old version was), or having
748  *	assistance of the format specification in terms of a special update
749  *	header that invalidates a previous archive record. The posix spec left
750  *	the method used to implement -u unspecified. This pax is able to
751  *	over write existing files that it creates.
752  */
753 
754 int
755 append(void)
756 {
757 	ARCHD *arcn;
758 	int res;
759 	FSUB *orgfrmt;
760 	int udev;
761 	off_t tlen;
762 
763 	arcn = &archd;
764 	orgfrmt = frmt;
765 
766 	/*
767 	 * Do not allow an append operation if the actual archive is of a
768 	 * different format than the user specified format.
769 	 */
770 	if (get_arc() < 0)
771 		return 1;
772 	if ((orgfrmt != NULL) && (orgfrmt != frmt)) {
773 		tty_warn(1, "Cannot mix current archive format %s with %s",
774 		    frmt->name, orgfrmt->name);
775 		return 1;
776 	}
777 
778 	/*
779 	 * pass the format any options and start up format
780 	 */
781 	if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0))
782 		return 1;
783 
784 	/*
785 	 * if we only are adding members that are newer, we need to save the
786 	 * mod times for all files we see.
787 	 */
788 	if (uflag && (ftime_start() < 0))
789 		return 1;
790 
791 	/*
792 	 * some archive formats encode hard links by recording the device and
793 	 * file serial number (inode) but copy the file anyway (multiple times)
794 	 * to the archive. When we append, we run the risk that newly added
795 	 * files may have the same device and inode numbers as those recorded
796 	 * on the archive but during a previous run. If this happens, when the
797 	 * archive is extracted we get INCORRECT hard links. We avoid this by
798 	 * remapping the device numbers so that newly added files will never
799 	 * use the same device number as one found on the archive. remapping
800 	 * allows new members to safely have links among themselves. remapping
801 	 * also avoids problems with file inode (serial number) truncations
802 	 * when the inode number is larger than storage space in the archive
803 	 * header. See the remap routines for more details.
804 	 */
805 	if ((udev = frmt->udev) && (dev_start() < 0))
806 		return 1;
807 
808 	/*
809 	 * reading the archive may take a long time. If verbose tell the user
810 	 */
811 	if (vflag || Vflag) {
812 		(void)fprintf(listf,
813 			"%s: Reading archive to position at the end...", argv0);
814 		vfpart = 1;
815 	}
816 
817 	/*
818 	 * step through the archive until the format says it is done
819 	 */
820 	while (next_head(arcn) == 0) {
821 		/*
822 		 * check if this file meets user specified options.
823 		 */
824 		if (sel_chk(arcn) != 0) {
825 			if (rd_skip(arcn->skip + arcn->pad) == 1)
826 				break;
827 			continue;
828 		}
829 
830 		if (uflag) {
831 			/*
832 			 * see if this is the newest version of this file has
833 			 * already been seen, if so skip.
834 			 */
835 			if ((res = chk_ftime(arcn)) < 0)
836 				break;
837 			if (res > 0) {
838 				if (rd_skip(arcn->skip + arcn->pad) == 1)
839 					break;
840 				continue;
841 			}
842 		}
843 
844 		/*
845 		 * Store this device number. Device numbers seen during the
846 		 * read phase of append will cause newly appended files with a
847 		 * device number seen in the old part of the archive to be
848 		 * remapped to an unused device number.
849 		 */
850 		if ((udev && (add_dev(arcn) < 0)) ||
851 		    (rd_skip(arcn->skip + arcn->pad) == 1))
852 			break;
853 	}
854 
855 	/*
856 	 * done, finish up read and get the number of bytes to back up so we
857 	 * can add new members. The format might have used the hard link table,
858 	 * purge it.
859 	 */
860 	tlen = (*frmt->end_rd)();
861 	lnk_end();
862 
863 	/*
864 	 * try to position for write, if this fails quit. if any error occurs,
865 	 * we will refuse to write
866 	 */
867 	if (appnd_start(tlen) < 0)
868 		return 1;
869 
870 	/*
871 	 * tell the user we are done reading.
872 	 */
873 	if ((vflag || Vflag) && vfpart) {
874 		(void)safe_print("done.\n", listf);
875 		vfpart = 0;
876 	}
877 
878 	/*
879 	 * go to the writing phase to add the new members
880 	 */
881 	res = wr_archive(arcn, 1);
882 	if (res == 1) {
883 		/*
884 		 * wr_archive failed in some way, but before any files were
885 		 * added. These are the only steps needed to cleanup (and
886 		 * not truncate the archive).
887 		 */
888 		wr_fin();
889 		(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
890 		ar_close();
891 	}
892 	return res;
893 }
894 
895 /*
896  * archive()
897  *	write a new archive
898  */
899 
900 int
901 archive(void)
902 {
903 
904 	/*
905 	 * if we only are adding members that are newer, we need to save the
906 	 * mod times for all files; set up for writing; pass the format any
907 	 * options write the archive
908 	 */
909 	if ((uflag && (ftime_start() < 0)) || (wr_start() < 0))
910 		return 1;
911 	if ((*frmt->options)() < 0)
912 		return 1;
913 
914 	return wr_archive(&archd, 0);
915 }
916 
917 /*
918  * copy()
919  *	copy files from one part of the file system to another. this does not
920  *	use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an
921  *	archive was written and then extracted in the destination directory
922  *	(except the files are forced to be under the destination directory).
923  */
924 
925 int
926 copy(void)
927 {
928 	ARCHD *arcn;
929 	int res;
930 	int fddest;
931 	char *dest_pt;
932 	size_t dlen;
933 	size_t drem;
934 	int fdsrc = -1;
935 	struct stat sb;
936 	char dirbuf[PAXPATHLEN+1];
937 
938 	arcn = &archd;
939 	/*
940 	 * set up the destination dir path and make sure it is a directory. We
941 	 * make sure we have a trailing / on the destination
942 	 */
943 	dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf));
944 	if (dlen >= sizeof(dirbuf) ||
945 	    (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) {
946 		tty_warn(1, "directory name is too long %s", dirptr);
947 		return 1;
948 	}
949 	dest_pt = dirbuf + dlen;
950 	if (*(dest_pt-1) != '/') {
951 		*dest_pt++ = '/';
952 		++dlen;
953 	}
954 	*dest_pt = '\0';
955 	drem = PAXPATHLEN - dlen;
956 
957 	if (stat(dirptr, &sb) < 0) {
958 		syswarn(1, errno, "Cannot access destination directory %s",
959 			dirptr);
960 		return 1;
961 	}
962 	if (!S_ISDIR(sb.st_mode)) {
963 		tty_warn(1, "Destination is not a directory %s", dirptr);
964 		return 1;
965 	}
966 
967 	/*
968 	 * start up the hard link table; file traversal routines and the
969 	 * modification time and access mode database
970 	 */
971 	if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0))
972 		return 1;
973 
974 	/*
975 	 * When we are doing interactive rename, we store the mapping of names
976 	 * so we can fix up hard links files later in the archive.
977 	 */
978 	if (iflag && (name_start() < 0))
979 		return 1;
980 
981 	/*
982 	 * set up to cp file trees
983 	 */
984 	cp_start();
985 
986 	/*
987 	 * while there are files to archive, process them
988 	 */
989 	while (next_file(arcn) == 0) {
990 		fdsrc = -1;
991 
992 		/*
993 		 * check if this file meets user specified options
994 		 */
995 		if (sel_chk(arcn) != 0)
996 			continue;
997 
998 		/*
999 		 * if there is already a file in the destination directory with
1000 		 * the same name and it is newer, skip the one stored on the
1001 		 * archive.
1002 		 * NOTE: this test is done BEFORE name modifications as
1003 		 * specified by pax. this can be confusing to the user who
1004 		 * might expect the test to be done on an existing file AFTER
1005 		 * the name mod. In honesty the pax spec is probably flawed in
1006 		 * this respect
1007 		 */
1008 		if (uflag || Dflag) {
1009 			/*
1010 			 * create the destination name
1011 			 */
1012 			if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'),
1013 			    drem + 1) > drem) {
1014 				tty_warn(1, "Destination pathname too long %s",
1015 					arcn->name);
1016 				continue;
1017 			}
1018 
1019 			/*
1020 			 * if existing file is same age or newer skip
1021 			 */
1022 			res = lstat(dirbuf, &sb);
1023 			*dest_pt = '\0';
1024 
1025 			if (res == 0) {
1026 				if (uflag && Dflag) {
1027 					if ((arcn->sb.st_mtime<=sb.st_mtime) &&
1028 					    (arcn->sb.st_ctime<=sb.st_ctime))
1029 						continue;
1030 				} else if (Dflag) {
1031 					if (arcn->sb.st_ctime <= sb.st_ctime)
1032 						continue;
1033 				} else if (arcn->sb.st_mtime <= sb.st_mtime)
1034 					continue;
1035 			}
1036 		}
1037 
1038 		/*
1039 		 * this file is considered selected. See if this is a hard link
1040 		 * to a previous file; modify the name as requested by the
1041 		 * user; set the final destination.
1042 		 */
1043 		ftree_sel(arcn);
1044 		if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn, RENM)) < 0))
1045 			break;
1046 		if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) {
1047 			/*
1048 			 * skip file, purge from link table
1049 			 */
1050 			purg_lnk(arcn);
1051 			continue;
1052 		}
1053 
1054 		/*
1055 		 * Non standard -Y and -Z flag. When the existing file is
1056 		 * same age or newer skip
1057 		 */
1058 		if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) {
1059 			if (Yflag && Zflag) {
1060 				if ((arcn->sb.st_mtime <= sb.st_mtime) &&
1061 				    (arcn->sb.st_ctime <= sb.st_ctime))
1062 					continue;
1063 			} else if (Yflag) {
1064 				if (arcn->sb.st_ctime <= sb.st_ctime)
1065 					continue;
1066 			} else if (arcn->sb.st_mtime <= sb.st_mtime)
1067 				continue;
1068 		}
1069 
1070 		if (vflag) {
1071 			(void)safe_print(arcn->name, listf);
1072 			vfpart = 1;
1073 		}
1074 		++flcnt;
1075 
1076 		/*
1077 		 * try to create a hard link to the src file if requested
1078 		 * but make sure we are not trying to overwrite ourselves.
1079 		 */
1080 		if (lflag)
1081 			res = cross_lnk(arcn);
1082 		else
1083 			res = chk_same(arcn);
1084 		if (res <= 0) {
1085 			if (vflag && vfpart) {
1086 				(void)putc('\n', listf);
1087 				vfpart = 0;
1088 			}
1089 			continue;
1090 		}
1091 
1092 		/*
1093 		 * have to create a new file
1094 		 */
1095 		if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) {
1096 			/*
1097 			 * create a link or special file
1098 			 */
1099 			if ((arcn->type == PAX_HLK) ||
1100 			    (arcn->type == PAX_HRG)) {
1101 				int payload;
1102 
1103 				res = lnk_creat(arcn, &payload);
1104 			} else {
1105 				res = node_creat(arcn);
1106 			}
1107 			if (res < 0)
1108 				purg_lnk(arcn);
1109 			if (vflag && vfpart) {
1110 				(void)putc('\n', listf);
1111 				vfpart = 0;
1112 			}
1113 			continue;
1114 		}
1115 
1116 		/*
1117 		 * have to copy a regular file to the destination directory.
1118 		 * first open source file and then create the destination file
1119 		 */
1120 		if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) {
1121 			syswarn(1, errno, "Unable to open %s to read",
1122 			    arcn->org_name);
1123 			purg_lnk(arcn);
1124 			continue;
1125 		}
1126 		if ((fddest = file_creat(arcn, 0)) < 0) {
1127 			rdfile_close(arcn, &fdsrc);
1128 			purg_lnk(arcn);
1129 			continue;
1130 		}
1131 
1132 		/*
1133 		 * copy source file data to the destination file.
1134 		 * if there was a failure, remove the temporary file
1135 		 * and leave any existing destination file unmodified.
1136 		 */
1137 		if (cp_file(arcn, fdsrc, fddest) < 0)
1138 			file_cleanup(arcn, fddest);
1139 		else
1140 			file_close(arcn, fddest);
1141 		rdfile_close(arcn, &fdsrc);
1142 
1143 		if (vflag && vfpart) {
1144 			(void)putc('\n', listf);
1145 			vfpart = 0;
1146 		}
1147 	}
1148 
1149 	/*
1150 	 * restore directory modes and times as required; make sure all
1151 	 * patterns were selected block off signals to avoid chance for
1152 	 * multiple entry into the cleanup code.
1153 	 */
1154 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
1155 	ar_close();
1156 	proc_dir();
1157 	ftree_chk();
1158 
1159 	return 0;
1160 }
1161 
1162 /*
1163  * next_head()
1164  *	try to find a valid header in the archive. Uses format specific
1165  *	routines to extract the header and id the trailer. Trailers may be
1166  *	located within a valid header or in an invalid header (the location
1167  *	is format specific. The inhead field from the option table tells us
1168  *	where to look for the trailer).
1169  *	We keep reading (and resyncing) until we get enough contiguous data
1170  *	to check for a header. If we cannot find one, we shift by a byte
1171  *	add a new byte from the archive to the end of the buffer and try again.
1172  *	If we get a read error, we throw out what we have (as we must have
1173  *	contiguous data) and start over again.
1174  *	ASSUMED: headers fit within a BLKMULT header.
1175  * Return:
1176  *	0 if we got a header, -1 if we are unable to ever find another one
1177  *	(we reached the end of input, or we reached the limit on retries. see
1178  *	the specs for rd_wrbuf() for more details)
1179  */
1180 
1181 static int
1182 next_head(ARCHD *arcn)
1183 {
1184 	int ret;
1185 	char *hdend;
1186 	int res;
1187 	int shftsz;
1188 	int hsz;
1189 	int in_resync = 0;		/* set when we are in resync mode */
1190 	int cnt = 0;			/* counter for trailer function */
1191 	int first = 1;			/* on 1st read, EOF isn't premature. */
1192 
1193 	/*
1194 	 * set up initial conditions, we want a whole frmt->hsz block as we
1195 	 * have no data yet.
1196 	 */
1197 	res = hsz = frmt->hsz;
1198 	hdend = hdbuf;
1199 	shftsz = hsz - 1;
1200 	for(;;) {
1201 		/*
1202 		 * keep looping until we get a contiguous FULL buffer
1203 		 * (frmt->hsz is the proper size)
1204 		 */
1205 		for (;;) {
1206 			if ((ret = rd_wrbuf(hdend, res)) == res)
1207 				break;
1208 
1209 			/*
1210 			 * If we read 0 bytes (EOF) from an archive when we
1211 			 * expect to find a header, we have stepped upon
1212 			 * an archive without the customary block of zeroes
1213 			 * end marker.  It's just stupid to error out on
1214 			 * them, so exit gracefully.
1215 			 */
1216 			if (first && ret == 0)
1217 				return -1;
1218 			first = 0;
1219 
1220 			/*
1221 			 * some kind of archive read problem, try to resync the
1222 			 * storage device, better give the user the bad news.
1223 			 */
1224 			if ((ret == 0) || (rd_sync() < 0)) {
1225 				tty_warn(1,
1226 				    "Premature end of file on archive read");
1227 				return -1;
1228 			}
1229 			if (!in_resync) {
1230 				if (act == APPND) {
1231 					tty_warn(1,
1232 					  "Archive I/O error, cannot continue");
1233 					return -1;
1234 				}
1235 				tty_warn(1,
1236 				    "Archive I/O error. Trying to recover.");
1237 				++in_resync;
1238 			}
1239 
1240 			/*
1241 			 * oh well, throw it all out and start over
1242 			 */
1243 			res = hsz;
1244 			hdend = hdbuf;
1245 		}
1246 
1247 		/*
1248 		 * ok we have a contiguous buffer of the right size. Call the
1249 		 * format read routine. If this was not a valid header and this
1250 		 * format stores trailers outside of the header, call the
1251 		 * format specific trailer routine to check for a trailer. We
1252 		 * have to watch out that we do not mis-identify file data or
1253 		 * block padding as a header or trailer. Format specific
1254 		 * trailer functions must NOT check for the trailer while we
1255 		 * are running in resync mode. Some trailer functions may tell
1256 		 * us that this block cannot contain a valid header either, so
1257 		 * we then throw out the entire block and start over.
1258 		 */
1259 		if ((*frmt->rd)(arcn, hdbuf) == 0)
1260 			break;
1261 
1262 		if (!frmt->inhead) {
1263 			/*
1264 			 * this format has trailers outside of valid headers
1265 			 */
1266 			if ((ret = (*frmt->trail)(hdbuf,in_resync,&cnt)) == 0){
1267 				/*
1268 				 * valid trailer found, drain input as required
1269 				 */
1270 				ar_drain();
1271 				return -1;
1272 			}
1273 
1274 			if (ret == 1) {
1275 				/*
1276 				 * we are in resync and we were told to throw
1277 				 * the whole block out because none of the
1278 				 * bytes in this block can be used to form a
1279 				 * valid header
1280 				 */
1281 				res = hsz;
1282 				hdend = hdbuf;
1283 				continue;
1284 			}
1285 		}
1286 
1287 		/*
1288 		 * Brute force section.
1289 		 * not a valid header. We may be able to find a header yet. So
1290 		 * we shift over by one byte, and set up to read one byte at a
1291 		 * time from the archive and place it at the end of the buffer.
1292 		 * We will keep moving byte at a time until we find a header or
1293 		 * get a read error and have to start over.
1294 		 */
1295 		if (!in_resync) {
1296 			if (act == APPND) {
1297 				tty_warn(1,
1298 				    "Unable to append, archive header flaw");
1299 				return -1;
1300 			}
1301 			tty_warn(1,
1302 			    "Invalid header, starting valid header search.");
1303 			++in_resync;
1304 		}
1305 		memmove(hdbuf, hdbuf+1, shftsz);
1306 		res = 1;
1307 		hdend = hdbuf + shftsz;
1308 	}
1309 
1310 	/*
1311 	 * ok got a valid header, check for trailer if format encodes it in the
1312 	 * the header. NOTE: the parameters are different than trailer routines
1313 	 * which encode trailers outside of the header!
1314 	 */
1315 	if (frmt->inhead && ((*frmt->subtrail)(arcn) == 0)) {
1316 		/*
1317 		 * valid trailer found, drain input as required
1318 		 */
1319 		ar_drain();
1320 		return -1;
1321 	}
1322 
1323 	++flcnt;
1324 	return 0;
1325 }
1326 
1327 /*
1328  * get_arc()
1329  *	Figure out what format an archive is. Handles archive with flaws by
1330  *	brute force searches for a legal header in any supported format. The
1331  *	format id routines have to be careful to NOT mis-identify a format.
1332  *	ASSUMED: headers fit within a BLKMULT header.
1333  * Return:
1334  *	0 if archive found -1 otherwise
1335  */
1336 
1337 static int
1338 get_arc(void)
1339 {
1340 	int i;
1341 	int hdsz = 0;
1342 	int res;
1343 	int minhd = BLKMULT;
1344 	char *hdend;
1345 	int notice = 0;
1346 
1347 	/*
1348 	 * find the smallest header size in all archive formats and then set up
1349 	 * to read the archive.
1350 	 */
1351 	for (i = 0; ford[i] >= 0; ++i) {
1352 		if (fsub[ford[i]].hsz < minhd)
1353 			minhd = fsub[ford[i]].hsz;
1354 	}
1355 	if (rd_start() < 0)
1356 		return -1;
1357 	res = BLKMULT;
1358 	hdsz = 0;
1359 	hdend = hdbuf;
1360 	for(;;) {
1361 		for (;;) {
1362 			/*
1363 			 * fill the buffer with at least the smallest header
1364 			 */
1365 			i = rd_wrbuf(hdend, res);
1366 			if (i > 0)
1367 				hdsz += i;
1368 			if (hdsz >= minhd)
1369 				break;
1370 
1371 			/*
1372 			 * if we cannot recover from a read error quit
1373 			 */
1374 			if ((i == 0) || (rd_sync() < 0))
1375 				goto out;
1376 
1377 			/*
1378 			 * when we get an error none of the data we already
1379 			 * have can be used to create a legal header (we just
1380 			 * got an error in the middle), so we throw it all out
1381 			 * and refill the buffer with fresh data.
1382 			 */
1383 			res = BLKMULT;
1384 			hdsz = 0;
1385 			hdend = hdbuf;
1386 			if (!notice) {
1387 				if (act == APPND)
1388 					return -1;
1389 				tty_warn(1,
1390 				    "Cannot identify format. Searching...");
1391 				++notice;
1392 			}
1393 		}
1394 
1395 		/*
1396 		 * we have at least the size of the smallest header in any
1397 		 * archive format. Look to see if we have a match. The array
1398 		 * ford[] is used to specify the header id order to reduce the
1399 		 * chance of incorrectly id'ing a valid header (some formats
1400 		 * may be subsets of each other and the order would then be
1401 		 * important).
1402 		 */
1403 		for (i = 0; ford[i] >= 0; ++i) {
1404 			if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0)
1405 				continue;
1406 			frmt = &(fsub[ford[i]]);
1407 			/*
1408 			 * yuck, to avoid slow special case code in the extract
1409 			 * routines, just push this header back as if it was
1410 			 * not seen. We have left extra space at start of the
1411 			 * buffer for this purpose. This is a bit ugly, but
1412 			 * adding all the special case code is far worse.
1413 			 */
1414 			pback(hdbuf, hdsz);
1415 			return 0;
1416 		}
1417 
1418 		/*
1419 		 * We have a flawed archive, no match. we start searching, but
1420 		 * we never allow additions to flawed archives
1421 		 */
1422 		if (!notice) {
1423 			if (act == APPND)
1424 				return -1;
1425 			tty_warn(1, "Cannot identify format. Searching...");
1426 			++notice;
1427 		}
1428 
1429 		/*
1430 		 * brute force search for a header that we can id.
1431 		 * we shift through byte at a time. this is slow, but we cannot
1432 		 * determine the nature of the flaw in the archive in a
1433 		 * portable manner
1434 		 */
1435 		if (--hdsz > 0) {
1436 			memmove(hdbuf, hdbuf+1, hdsz);
1437 			res = BLKMULT - hdsz;
1438 			hdend = hdbuf + hdsz;
1439 		} else {
1440 			res = BLKMULT;
1441 			hdend = hdbuf;
1442 			hdsz = 0;
1443 		}
1444 	}
1445 
1446     out:
1447 	/*
1448 	 * we cannot find a header, bow, apologize and quit
1449 	 */
1450 	tty_warn(1, "Sorry, unable to determine archive format.");
1451 	return -1;
1452 }
1453