xref: /csrg-svn/bin/pax/pax.c (revision 57112)
1*57112Smuller /*-
2*57112Smuller  * Copyright (c) 1992 Keith Muller.
3*57112Smuller  * Copyright (c) 1992 The Regents of the University of California.
4*57112Smuller  * All rights reserved.
5*57112Smuller  *
6*57112Smuller  * This code is derived from software contributed to Berkeley by
7*57112Smuller  * Keith Muller of the University of California, San Diego.
8*57112Smuller  *
9*57112Smuller  * %sccs.include.redist.c%
10*57112Smuller  */
11*57112Smuller 
12*57112Smuller #ifndef lint
13*57112Smuller char copyright[] =
14*57112Smuller "@(#) Copyright (c) 1992 The Regents of the University of California.\n\
15*57112Smuller  All rights reserved.\n";
16*57112Smuller #endif /* not lint */
17*57112Smuller 
18*57112Smuller #ifndef lint
19*57112Smuller static char sccsid[] = "@(#)pax.c	1.1 (Berkeley) 12/13/92";
20*57112Smuller #endif /* not lint */
21*57112Smuller 
22*57112Smuller #include <stdio.h>
23*57112Smuller #include <sys/types.h>
24*57112Smuller #include <sys/param.h>
25*57112Smuller #include <sys/stat.h>
26*57112Smuller #include <sys/time.h>
27*57112Smuller #include <sys/resource.h>
28*57112Smuller #include <signal.h>
29*57112Smuller #include <unistd.h>
30*57112Smuller #include <stdlib.h>
31*57112Smuller #include <errno.h>
32*57112Smuller #include "pax.h"
33*57112Smuller #include "extern.h"
34*57112Smuller static int gen_init __P((void));
35*57112Smuller 
36*57112Smuller /*
37*57112Smuller  * BSD PAX main routines, general globals and some simple start up routines
38*57112Smuller  */
39*57112Smuller 
40*57112Smuller /*
41*57112Smuller  * Variables that can be accessed by any routine within pax
42*57112Smuller  */
43*57112Smuller int	act = DEFOP;		/* read/write/append/copy */
44*57112Smuller FSUB	*frmt = NULL;		/* archive format type */
45*57112Smuller int	cflag;			/* match all EXCEPT pattern/file */
46*57112Smuller int	dflag;			/* directory member match only  */
47*57112Smuller int	iflag;			/* interactive file/archive rename */
48*57112Smuller int	kflag;			/* do not overwrite existing files */
49*57112Smuller int	lflag;			/* use hard links when possible */
50*57112Smuller int	nflag;			/* select first archive member match */
51*57112Smuller int	tflag;			/* restore access time after read */
52*57112Smuller int	uflag;			/* ignore older modification time files */
53*57112Smuller int	vflag;			/* produce verbose output */
54*57112Smuller int	Hflag;			/* follow command line symlinks (write only) */
55*57112Smuller int	Lflag;			/* follow symlinks when writing archive */
56*57112Smuller int	Xflag;			/* archive files with same device id only */
57*57112Smuller int	Zflag;			/* move file time check to after name mode */
58*57112Smuller int	vfpart;			/* is partial verbose output in progress */
59*57112Smuller int	patime = 1;		/* preserve file access time */
60*57112Smuller int	pmtime = 1;		/* preserve file modification times */
61*57112Smuller int	pmode;			/* preserve file mode bits */
62*57112Smuller int	pids;			/* preserve file uid/gid */
63*57112Smuller int	exit_val;		/* exit value */
64*57112Smuller int	docrc;			/* check/create file crc */
65*57112Smuller char	*dirptr;		/* destination dir in a copy */
66*57112Smuller char	*ltmfrmt;		/* -v locale time format (if any) */
67*57112Smuller sigset_t s_mask;		/* signal mask for cleanup critical sect */
68*57112Smuller 
69*57112Smuller /*
70*57112Smuller  *	PAX - Portable Archive Interchange
71*57112Smuller  *
72*57112Smuller  * 	A utility to read, write, and write lists of the members of archive
73*57112Smuller  *	files and copy directory hierarchies. A variety of archive formats
74*57112Smuller  *	are supported (some are described in POSIX 1003.1 10.1):
75*57112Smuller  *
76*57112Smuller  *		ustar - 10.1.1 extended tar interchange format
77*57112Smuller  *		cpio  - 10.1.2 extended cpio interchange format
78*57112Smuller  *		tar - old BSD 4.3 tar format
79*57112Smuller  *		binary cpio - old cpio with binary header format
80*57112Smuller  *		sysVR4 cpio -  with and without CRC
81*57112Smuller  *
82*57112Smuller  * This version is a superset of IEEE Std 1003.2b-d3
83*57112Smuller  *
84*57112Smuller  * Summary of Extensions to the IEEE Standard:
85*57112Smuller  *
86*57112Smuller  * 1	Read enhancements
87*57112Smuller  * 1.1	Operations which read archives will continue to operate even when
88*57112Smuller  *	processing archives which may be damaged, truncated, or fail to meet
89*57112Smuller  *	format specs in several different ways. Damaged sections of archives
90*57112Smuller  *	are detected and avoided if possible. Attempts will be made to resync
91*57112Smuller  *	archive read operations even with badly damaged media.
92*57112Smuller  * 1.2	Blocksize requirements are not strictly enforced on archive read.
93*57112Smuller  *	Tapes which have variable sized records can be read without errors.
94*57112Smuller  * 1.3	The user can specify via the non-standard option flag -E if error
95*57112Smuller  *	resync operation should stop on a media error, try a specified number
96*57112Smuller  *	of times to correct, or try to correct forever.
97*57112Smuller  * 1.4	Sparse files (lseek holes) stored on the archive (but stored with blocks
98*57112Smuller  *	of all zeros will be restored with holes appropriate for the target
99*57112Smuller  *	filesystem
100*57112Smuller  * 1.5	The user is notified whenever something is found during archive
101*57112Smuller  *	read operations which violates spec (but the read will continue).
102*57112Smuller  * 1.6	Multiple archive volumes can be read and may span over different
103*57112Smuller  *	archive devices
104*57112Smuller  * 1.7	Rigidly restores all file attributes exactly as they are stored on the
105*57112Smuller  *	archive.
106*57112Smuller  * 1.8	Modification time ranges can be specified via multiple -T options.
107*57112Smuller  *	These allow a user to select files whose modification time lies within a
108*57112Smuller  *	specific time range.
109*57112Smuller  * 1.9	Files can be selected based on owner (user name or uid) via one or more
110*57112Smuller  *	-U options.
111*57112Smuller  * 1.10	Files can be selected based on group (group name or gid) via one o
112*57112Smuller  *	more -G options.
113*57112Smuller  * 1.11	File modification time can be checked against exisiting file after
114*57112Smuller  *	name modification (-Z)
115*57112Smuller  *
116*57112Smuller  * 2	Write enhancements
117*57112Smuller  * 2.1	Write operation will stop instead of allowing a user to create a flawed
118*57112Smuller  *	flawed archive (due to any problem).
119*57112Smuller  * 2.2	Archives writtens by pax are forced to strictly conform to both the
120*57112Smuller  *	archive and pax the spceific format specifications.
121*57112Smuller  * 2.3	Blocking size and format is rigidly enforced on writes.
122*57112Smuller  * 2.4	Formats which may exhibit header overflow problems (they have fields
123*57112Smuller  *	too small for large file systems, such as inode number storage), use
124*57112Smuller  *	routines designed to repair this problem. These techniques still
125*57112Smuller  *	conform to both pax and format specifications, but no longer truncate
126*57112Smuller  *	these fields. This removes any restrictions on using these archive
127*57112Smuller  *	formats on large file systems.
128*57112Smuller  * 2.5	Multiple archive volumes can be written and may span over different
129*57112Smuller  *	archive devices
130*57112Smuller  * 2.6	A archive volume record limit allows the user to specify the number
131*57112Smuller  *	of bytes stored on an archive volume. When reached the user is
132*57112Smuller  *	prompted for the next archive volume. This is specified with the
133*57112Smuller  *	non-standard -B flag. THe limit is rounded up to the next blocksize.
134*57112Smuller  * 2.7	All archive padding during write use zero filled sections. This makes
135*57112Smuller  *	it much easier to pull data out of flawed archive during read
136*57112Smuller  *	operations.
137*57112Smuller  * 2.8	Access time reset with the -t applies to all file nodes (including
138*57112Smuller  *	directories).
139*57112Smuller  * 2.9	Symbolic links can be followed with -L (optional in the spec).
140*57112Smuller  * 2.10	Modification time ranges can be specified via multiple -T options. These
141*57112Smuller  *	allow a user to select files whose modification time lies within a
142*57112Smuller  *	specific time range.
143*57112Smuller  * 2.11	Files can be selected based on owner (user name or uid) via one or more
144*57112Smuller  *	-U options.
145*57112Smuller  * 2.12	Files can be selected based on group (group name or gid) via one o
146*57112Smuller  *	more -G options.
147*57112Smuller  * 2.13	Symlinks which appear on the command line can be followed (without
148*57112Smuller  *	following other symlinks; -H flag)
149*57112Smuller  *
150*57112Smuller  * 3	Copy enhancements
151*57112Smuller  * 3.1	Sparse files (lseek holes) can be copied without expanding the holes
152*57112Smuller  *	into zero filled blocks. The file copy is created with holes which are
153*57112Smuller  *	appropriate for the target filesystem
154*57112Smuller  * 3.2	Access time as well as modification time on copied file trees can be
155*57112Smuller  *	preserved with the appropriate -p options.
156*57112Smuller  * 3.3	Access time reset with the -t applies to all file nodes (including
157*57112Smuller  *	directories).
158*57112Smuller  * 3.4	Symbolic links can be followed with -L (optional in the spec).
159*57112Smuller  * 3.5	Modification time ranges can be specified via multiple -T options. These
160*57112Smuller  *	allow a user to select files whose modification time lies within a
161*57112Smuller  *	specific time range.
162*57112Smuller  * 3.6	Files can be selected based on owner (user name or uid) via one or more
163*57112Smuller  *	-U options.
164*57112Smuller  * 3.7	Files can be selected based on group (group name or gid) via one o
165*57112Smuller  *	more -G options.
166*57112Smuller  * 3.8	Symlinks which appear on the command line can be followed (without
167*57112Smuller  *	following other symlinks; -H flag)
168*57112Smuller  * 3.9	File modification time can be checked against exisiting file after
169*57112Smuller  *	name modification (-Z)
170*57112Smuller  *
171*57112Smuller  * 4	General enhancements
172*57112Smuller  * 4.1	Internal structure is designed to isolate format dependent and
173*57112Smuller  *	independent functions. Formats are selected via a format driver table.
174*57112Smuller  *	This encourages the addition of new archive formats by only having to
175*57112Smuller  *	write those routines which id, read and write the archive header.
176*57112Smuller  */
177*57112Smuller 
178*57112Smuller #if __STDC__
179*57112Smuller int
180*57112Smuller main(int argc, char **argv)
181*57112Smuller #else
182*57112Smuller int
183*57112Smuller main(argc, argv)
184*57112Smuller 	int argc;
185*57112Smuller 	char **argv;
186*57112Smuller #endif
187*57112Smuller {
188*57112Smuller 	/*
189*57112Smuller 	 * parse options, set up and operate as specified by the user.
190*57112Smuller 	 * any operational flaw will set exit_val to none zero
191*57112Smuller 	 */
192*57112Smuller 	options(argc, argv);
193*57112Smuller         if ((gen_init() < 0) || (tty_init() < 0))
194*57112Smuller 		return(exit_val);
195*57112Smuller 
196*57112Smuller 	/*
197*57112Smuller 	 * select a primary operation mode
198*57112Smuller 	 */
199*57112Smuller 	switch(act) {
200*57112Smuller 	case EXTRACT:
201*57112Smuller 		extract();
202*57112Smuller 		break;
203*57112Smuller 	case ARCHIVE:
204*57112Smuller 		archive();
205*57112Smuller 		break;
206*57112Smuller 	case APPND:
207*57112Smuller 		append();
208*57112Smuller 		break;
209*57112Smuller 	case COPY:
210*57112Smuller 		copy();
211*57112Smuller 		break;
212*57112Smuller 	default:
213*57112Smuller 	case LIST:
214*57112Smuller 		list();
215*57112Smuller 		break;
216*57112Smuller 	}
217*57112Smuller 	return(exit_val);
218*57112Smuller }
219*57112Smuller 
220*57112Smuller /*
221*57112Smuller  * usage()
222*57112Smuller  *	print the usage summary to the user
223*57112Smuller  */
224*57112Smuller 
225*57112Smuller #if __STDC__
226*57112Smuller void
227*57112Smuller usage(void)
228*57112Smuller #else
229*57112Smuller void
230*57112Smuller usage()
231*57112Smuller #endif
232*57112Smuller {
233*57112Smuller 	(void)fputs("usage: pax [-cdnv] [-E limit] [-f archive]", stderr);
234*57112Smuller 	(void)fputs(" [-s replstr] ... [-U user] ...", stderr);
235*57112Smuller 	(void)fputs("\n           [-G group] ... ", stderr);
236*57112Smuller 	(void)fputs("[-T [from_date][,to_date]] ...  [pattern ...]\n", stderr);
237*57112Smuller 	(void)fputs("       pax -r [-cdiknuvZ] [-E limit] ", stderr);
238*57112Smuller 	(void)fputs("[-f archive] [-o options] ... \n", stderr);
239*57112Smuller 	(void)fputs("           [-p string] ... [-s replstr] ... ", stderr);
240*57112Smuller 	(void)fputs(" [-U user] ... [-G group] ...", stderr);
241*57112Smuller 	(void)fputs("\n           [-T [from_date][,to_date]] ... ", stderr);
242*57112Smuller 	(void)fputs(" [pattern ...]\n", stderr);
243*57112Smuller 	(void)fputs("       pax -w [-dituvHLX] [-b blocksize] ", stderr);
244*57112Smuller 	(void)fputs("[ [-a] [-f archive] ] [-x format] \n", stderr);
245*57112Smuller 	(void)fputs("           [-B bytes] [-s replstr] ... ", stderr);
246*57112Smuller 	(void)fputs("[-o options] ... [-U user] ...", stderr);
247*57112Smuller 	(void)fputs("\n           [-G group] ... ", stderr);
248*57112Smuller 	(void)fputs("[-T [from_date][,to_date]] ... [file ...]\n", stderr);
249*57112Smuller 	(void)fputs("       pax -r -w [-diklntuvHLXZ]", stderr);
250*57112Smuller 	(void)fputs("[-p string] ... [-s replstr] ... [-U user] ...", stderr);
251*57112Smuller 	(void)fputs("\n           [-G group] ... ", stderr);
252*57112Smuller 	(void)fputs("[-T [from_date][,to_date]] ... ", stderr);
253*57112Smuller 	(void)fputs("[file ...] directory\n", stderr);
254*57112Smuller 	exit(1);
255*57112Smuller }
256*57112Smuller 
257*57112Smuller /*
258*57112Smuller  * sig_cleanup()
259*57112Smuller  *	when interrupted we try to do whatever delayed processing we can.
260*57112Smuller  *	This is not critical, but we really ought to limit our damage when we
261*57112Smuller  *	are aborted by the user.
262*57112Smuller  * Return:
263*57112Smuller  *	never....
264*57112Smuller  */
265*57112Smuller 
266*57112Smuller #if __STDC__
267*57112Smuller void
268*57112Smuller sig_cleanup(int which_sig)
269*57112Smuller #else
270*57112Smuller void
271*57112Smuller sig_cleanup(which_sig)
272*57112Smuller 	int which_sig;
273*57112Smuller #endif
274*57112Smuller {
275*57112Smuller 	/*
276*57112Smuller 	 * restore modes and times for any dirs we may have created
277*57112Smuller 	 * or any dirs we may have read
278*57112Smuller 	 */
279*57112Smuller 	vfpart = 1;
280*57112Smuller 	if (which_sig == SIGXCPU)
281*57112Smuller 		warn(0, "Cpu time limit reached, cleaning up.");
282*57112Smuller 	else
283*57112Smuller 		warn(0, "Signal caught, cleaning up.");
284*57112Smuller 	ar_close();
285*57112Smuller 	proc_dir();
286*57112Smuller 	if (tflag)
287*57112Smuller 		atdir_end();
288*57112Smuller 	exit(1);
289*57112Smuller }
290*57112Smuller 
291*57112Smuller /*
292*57112Smuller  * gen_init()
293*57112Smuller  *	general setup routines. Not all are required, but they really help
294*57112Smuller  *	when dealing with a medium to large sized archives.
295*57112Smuller  */
296*57112Smuller 
297*57112Smuller #if __STDC__
298*57112Smuller static int
299*57112Smuller gen_init(void)
300*57112Smuller #else
301*57112Smuller static int
302*57112Smuller gen_init()
303*57112Smuller #endif
304*57112Smuller {
305*57112Smuller 	struct rlimit reslimit;
306*57112Smuller 	struct sigaction n_hand;
307*57112Smuller 	struct sigaction o_hand;
308*57112Smuller 
309*57112Smuller 	/*
310*57112Smuller 	 * Really needed to handle large archives. We can run out of memory for
311*57112Smuller 	 * internal tables really fast when we have a whole lot of files...
312*57112Smuller 	 */
313*57112Smuller 	if (getrlimit(RLIMIT_DATA , &reslimit) == 0){
314*57112Smuller 		reslimit.rlim_cur = reslimit.rlim_max;
315*57112Smuller 		(void)setrlimit(RLIMIT_DATA , &reslimit);
316*57112Smuller 	}
317*57112Smuller 
318*57112Smuller 	/*
319*57112Smuller 	 * should file size limits be waived? if the os limits us, this is
320*57112Smuller 	 * needed if we want to write a large archive
321*57112Smuller 	 */
322*57112Smuller 	if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){
323*57112Smuller 		reslimit.rlim_cur = reslimit.rlim_max;
324*57112Smuller 		(void)setrlimit(RLIMIT_FSIZE , &reslimit);
325*57112Smuller 	}
326*57112Smuller 
327*57112Smuller 	/*
328*57112Smuller 	 * increase the size the stack can grow to
329*57112Smuller 	 */
330*57112Smuller 	if (getrlimit(RLIMIT_STACK , &reslimit) == 0){
331*57112Smuller 		reslimit.rlim_cur = reslimit.rlim_max;
332*57112Smuller 		(void)setrlimit(RLIMIT_STACK , &reslimit);
333*57112Smuller 	}
334*57112Smuller 
335*57112Smuller 	/*
336*57112Smuller 	 * not really needed, but doesn't hurt
337*57112Smuller 	 */
338*57112Smuller 	if (getrlimit(RLIMIT_RSS , &reslimit) == 0){
339*57112Smuller 		reslimit.rlim_cur = reslimit.rlim_max;
340*57112Smuller 		(void)setrlimit(RLIMIT_RSS , &reslimit);
341*57112Smuller 	}
342*57112Smuller 
343*57112Smuller 	/*
344*57112Smuller 	 * Handle posix locale
345*57112Smuller 	 *
346*57112Smuller 	 * set user defines time printing format for -v option
347*57112Smuller 	 */
348*57112Smuller 	ltmfrmt = getenv("LC_TIME");
349*57112Smuller 
350*57112Smuller 	/*
351*57112Smuller 	 * signal handling to reset stored directory times and modes. Since
352*57112Smuller 	 * we deal with broken pipes via failed writes we ignore it. We also
353*57112Smuller 	 * deal with exceeed file size limit with failed writes. Cpu time
354*57112Smuller 	 * limits is caught and a cleanup is forced. All other "user"
355*57112Smuller 	 * generated signals are handled.
356*57112Smuller 	 */
357*57112Smuller 	if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) ||
358*57112Smuller 	    (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) ||
359*57112Smuller 	    (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) ||
360*57112Smuller 	    (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) {
361*57112Smuller 		warn(1, "Unable to set up signal mask");
362*57112Smuller 		return(-1);
363*57112Smuller 	}
364*57112Smuller 	n_hand.sa_mask = s_mask;
365*57112Smuller 	n_hand.sa_flags = 0;
366*57112Smuller 	n_hand.sa_handler = sig_cleanup;
367*57112Smuller 
368*57112Smuller 	if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) &&
369*57112Smuller 	    (o_hand.sa_handler == SIG_IGN) &&
370*57112Smuller 	    (sigaction(SIGHUP, &o_hand, &o_hand) < 0))
371*57112Smuller 		goto out;
372*57112Smuller 
373*57112Smuller 	if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) &&
374*57112Smuller 	    (o_hand.sa_handler == SIG_IGN) &&
375*57112Smuller 	    (sigaction(SIGTERM, &o_hand, &o_hand) < 0))
376*57112Smuller 		goto out;
377*57112Smuller 
378*57112Smuller 	if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) &&
379*57112Smuller 	    (o_hand.sa_handler == SIG_IGN) &&
380*57112Smuller 	    (sigaction(SIGINT, &o_hand, &o_hand) < 0))
381*57112Smuller 		goto out;
382*57112Smuller 
383*57112Smuller 	if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) &&
384*57112Smuller 	    (o_hand.sa_handler == SIG_IGN) &&
385*57112Smuller 	    (sigaction(SIGQUIT, &o_hand, &o_hand) < 0))
386*57112Smuller 		goto out;
387*57112Smuller 
388*57112Smuller 	if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) &&
389*57112Smuller 	    (o_hand.sa_handler == SIG_IGN) &&
390*57112Smuller 	    (sigaction(SIGXCPU, &o_hand, &o_hand) < 0))
391*57112Smuller 		goto out;
392*57112Smuller 
393*57112Smuller 	n_hand.sa_handler = SIG_IGN;
394*57112Smuller 	if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) ||
395*57112Smuller 	    (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0))
396*57112Smuller 		goto out;
397*57112Smuller 	return(0);
398*57112Smuller 
399*57112Smuller     out:
400*57112Smuller 	syswarn(1, errno, "Unable to set up signal handler");
401*57112Smuller 	return(-1);
402*57112Smuller }
403