xref: /csrg-svn/bin/pax/pax.c (revision 57112)
1 /*-
2  * Copyright (c) 1992 Keith Muller.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Keith Muller of the University of California, San Diego.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 char copyright[] =
14 "@(#) Copyright (c) 1992 The Regents of the University of California.\n\
15  All rights reserved.\n";
16 #endif /* not lint */
17 
18 #ifndef lint
19 static char sccsid[] = "@(#)pax.c	1.1 (Berkeley) 12/13/92";
20 #endif /* not lint */
21 
22 #include <stdio.h>
23 #include <sys/types.h>
24 #include <sys/param.h>
25 #include <sys/stat.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include <signal.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <errno.h>
32 #include "pax.h"
33 #include "extern.h"
34 static int gen_init __P((void));
35 
36 /*
37  * BSD PAX main routines, general globals and some simple start up routines
38  */
39 
40 /*
41  * Variables that can be accessed by any routine within pax
42  */
43 int	act = DEFOP;		/* read/write/append/copy */
44 FSUB	*frmt = NULL;		/* archive format type */
45 int	cflag;			/* match all EXCEPT pattern/file */
46 int	dflag;			/* directory member match only  */
47 int	iflag;			/* interactive file/archive rename */
48 int	kflag;			/* do not overwrite existing files */
49 int	lflag;			/* use hard links when possible */
50 int	nflag;			/* select first archive member match */
51 int	tflag;			/* restore access time after read */
52 int	uflag;			/* ignore older modification time files */
53 int	vflag;			/* produce verbose output */
54 int	Hflag;			/* follow command line symlinks (write only) */
55 int	Lflag;			/* follow symlinks when writing archive */
56 int	Xflag;			/* archive files with same device id only */
57 int	Zflag;			/* move file time check to after name mode */
58 int	vfpart;			/* is partial verbose output in progress */
59 int	patime = 1;		/* preserve file access time */
60 int	pmtime = 1;		/* preserve file modification times */
61 int	pmode;			/* preserve file mode bits */
62 int	pids;			/* preserve file uid/gid */
63 int	exit_val;		/* exit value */
64 int	docrc;			/* check/create file crc */
65 char	*dirptr;		/* destination dir in a copy */
66 char	*ltmfrmt;		/* -v locale time format (if any) */
67 sigset_t s_mask;		/* signal mask for cleanup critical sect */
68 
69 /*
70  *	PAX - Portable Archive Interchange
71  *
72  * 	A utility to read, write, and write lists of the members of archive
73  *	files and copy directory hierarchies. A variety of archive formats
74  *	are supported (some are described in POSIX 1003.1 10.1):
75  *
76  *		ustar - 10.1.1 extended tar interchange format
77  *		cpio  - 10.1.2 extended cpio interchange format
78  *		tar - old BSD 4.3 tar format
79  *		binary cpio - old cpio with binary header format
80  *		sysVR4 cpio -  with and without CRC
81  *
82  * This version is a superset of IEEE Std 1003.2b-d3
83  *
84  * Summary of Extensions to the IEEE Standard:
85  *
86  * 1	Read enhancements
87  * 1.1	Operations which read archives will continue to operate even when
88  *	processing archives which may be damaged, truncated, or fail to meet
89  *	format specs in several different ways. Damaged sections of archives
90  *	are detected and avoided if possible. Attempts will be made to resync
91  *	archive read operations even with badly damaged media.
92  * 1.2	Blocksize requirements are not strictly enforced on archive read.
93  *	Tapes which have variable sized records can be read without errors.
94  * 1.3	The user can specify via the non-standard option flag -E if error
95  *	resync operation should stop on a media error, try a specified number
96  *	of times to correct, or try to correct forever.
97  * 1.4	Sparse files (lseek holes) stored on the archive (but stored with blocks
98  *	of all zeros will be restored with holes appropriate for the target
99  *	filesystem
100  * 1.5	The user is notified whenever something is found during archive
101  *	read operations which violates spec (but the read will continue).
102  * 1.6	Multiple archive volumes can be read and may span over different
103  *	archive devices
104  * 1.7	Rigidly restores all file attributes exactly as they are stored on the
105  *	archive.
106  * 1.8	Modification time ranges can be specified via multiple -T options.
107  *	These allow a user to select files whose modification time lies within a
108  *	specific time range.
109  * 1.9	Files can be selected based on owner (user name or uid) via one or more
110  *	-U options.
111  * 1.10	Files can be selected based on group (group name or gid) via one o
112  *	more -G options.
113  * 1.11	File modification time can be checked against exisiting file after
114  *	name modification (-Z)
115  *
116  * 2	Write enhancements
117  * 2.1	Write operation will stop instead of allowing a user to create a flawed
118  *	flawed archive (due to any problem).
119  * 2.2	Archives writtens by pax are forced to strictly conform to both the
120  *	archive and pax the spceific format specifications.
121  * 2.3	Blocking size and format is rigidly enforced on writes.
122  * 2.4	Formats which may exhibit header overflow problems (they have fields
123  *	too small for large file systems, such as inode number storage), use
124  *	routines designed to repair this problem. These techniques still
125  *	conform to both pax and format specifications, but no longer truncate
126  *	these fields. This removes any restrictions on using these archive
127  *	formats on large file systems.
128  * 2.5	Multiple archive volumes can be written and may span over different
129  *	archive devices
130  * 2.6	A archive volume record limit allows the user to specify the number
131  *	of bytes stored on an archive volume. When reached the user is
132  *	prompted for the next archive volume. This is specified with the
133  *	non-standard -B flag. THe limit is rounded up to the next blocksize.
134  * 2.7	All archive padding during write use zero filled sections. This makes
135  *	it much easier to pull data out of flawed archive during read
136  *	operations.
137  * 2.8	Access time reset with the -t applies to all file nodes (including
138  *	directories).
139  * 2.9	Symbolic links can be followed with -L (optional in the spec).
140  * 2.10	Modification time ranges can be specified via multiple -T options. These
141  *	allow a user to select files whose modification time lies within a
142  *	specific time range.
143  * 2.11	Files can be selected based on owner (user name or uid) via one or more
144  *	-U options.
145  * 2.12	Files can be selected based on group (group name or gid) via one o
146  *	more -G options.
147  * 2.13	Symlinks which appear on the command line can be followed (without
148  *	following other symlinks; -H flag)
149  *
150  * 3	Copy enhancements
151  * 3.1	Sparse files (lseek holes) can be copied without expanding the holes
152  *	into zero filled blocks. The file copy is created with holes which are
153  *	appropriate for the target filesystem
154  * 3.2	Access time as well as modification time on copied file trees can be
155  *	preserved with the appropriate -p options.
156  * 3.3	Access time reset with the -t applies to all file nodes (including
157  *	directories).
158  * 3.4	Symbolic links can be followed with -L (optional in the spec).
159  * 3.5	Modification time ranges can be specified via multiple -T options. These
160  *	allow a user to select files whose modification time lies within a
161  *	specific time range.
162  * 3.6	Files can be selected based on owner (user name or uid) via one or more
163  *	-U options.
164  * 3.7	Files can be selected based on group (group name or gid) via one o
165  *	more -G options.
166  * 3.8	Symlinks which appear on the command line can be followed (without
167  *	following other symlinks; -H flag)
168  * 3.9	File modification time can be checked against exisiting file after
169  *	name modification (-Z)
170  *
171  * 4	General enhancements
172  * 4.1	Internal structure is designed to isolate format dependent and
173  *	independent functions. Formats are selected via a format driver table.
174  *	This encourages the addition of new archive formats by only having to
175  *	write those routines which id, read and write the archive header.
176  */
177 
178 #if __STDC__
179 int
180 main(int argc, char **argv)
181 #else
182 int
183 main(argc, argv)
184 	int argc;
185 	char **argv;
186 #endif
187 {
188 	/*
189 	 * parse options, set up and operate as specified by the user.
190 	 * any operational flaw will set exit_val to none zero
191 	 */
192 	options(argc, argv);
193         if ((gen_init() < 0) || (tty_init() < 0))
194 		return(exit_val);
195 
196 	/*
197 	 * select a primary operation mode
198 	 */
199 	switch(act) {
200 	case EXTRACT:
201 		extract();
202 		break;
203 	case ARCHIVE:
204 		archive();
205 		break;
206 	case APPND:
207 		append();
208 		break;
209 	case COPY:
210 		copy();
211 		break;
212 	default:
213 	case LIST:
214 		list();
215 		break;
216 	}
217 	return(exit_val);
218 }
219 
220 /*
221  * usage()
222  *	print the usage summary to the user
223  */
224 
225 #if __STDC__
226 void
227 usage(void)
228 #else
229 void
230 usage()
231 #endif
232 {
233 	(void)fputs("usage: pax [-cdnv] [-E limit] [-f archive]", stderr);
234 	(void)fputs(" [-s replstr] ... [-U user] ...", stderr);
235 	(void)fputs("\n           [-G group] ... ", stderr);
236 	(void)fputs("[-T [from_date][,to_date]] ...  [pattern ...]\n", stderr);
237 	(void)fputs("       pax -r [-cdiknuvZ] [-E limit] ", stderr);
238 	(void)fputs("[-f archive] [-o options] ... \n", stderr);
239 	(void)fputs("           [-p string] ... [-s replstr] ... ", stderr);
240 	(void)fputs(" [-U user] ... [-G group] ...", stderr);
241 	(void)fputs("\n           [-T [from_date][,to_date]] ... ", stderr);
242 	(void)fputs(" [pattern ...]\n", stderr);
243 	(void)fputs("       pax -w [-dituvHLX] [-b blocksize] ", stderr);
244 	(void)fputs("[ [-a] [-f archive] ] [-x format] \n", stderr);
245 	(void)fputs("           [-B bytes] [-s replstr] ... ", stderr);
246 	(void)fputs("[-o options] ... [-U user] ...", stderr);
247 	(void)fputs("\n           [-G group] ... ", stderr);
248 	(void)fputs("[-T [from_date][,to_date]] ... [file ...]\n", stderr);
249 	(void)fputs("       pax -r -w [-diklntuvHLXZ]", stderr);
250 	(void)fputs("[-p string] ... [-s replstr] ... [-U user] ...", stderr);
251 	(void)fputs("\n           [-G group] ... ", stderr);
252 	(void)fputs("[-T [from_date][,to_date]] ... ", stderr);
253 	(void)fputs("[file ...] directory\n", stderr);
254 	exit(1);
255 }
256 
257 /*
258  * sig_cleanup()
259  *	when interrupted we try to do whatever delayed processing we can.
260  *	This is not critical, but we really ought to limit our damage when we
261  *	are aborted by the user.
262  * Return:
263  *	never....
264  */
265 
266 #if __STDC__
267 void
268 sig_cleanup(int which_sig)
269 #else
270 void
271 sig_cleanup(which_sig)
272 	int which_sig;
273 #endif
274 {
275 	/*
276 	 * restore modes and times for any dirs we may have created
277 	 * or any dirs we may have read
278 	 */
279 	vfpart = 1;
280 	if (which_sig == SIGXCPU)
281 		warn(0, "Cpu time limit reached, cleaning up.");
282 	else
283 		warn(0, "Signal caught, cleaning up.");
284 	ar_close();
285 	proc_dir();
286 	if (tflag)
287 		atdir_end();
288 	exit(1);
289 }
290 
291 /*
292  * gen_init()
293  *	general setup routines. Not all are required, but they really help
294  *	when dealing with a medium to large sized archives.
295  */
296 
297 #if __STDC__
298 static int
299 gen_init(void)
300 #else
301 static int
302 gen_init()
303 #endif
304 {
305 	struct rlimit reslimit;
306 	struct sigaction n_hand;
307 	struct sigaction o_hand;
308 
309 	/*
310 	 * Really needed to handle large archives. We can run out of memory for
311 	 * internal tables really fast when we have a whole lot of files...
312 	 */
313 	if (getrlimit(RLIMIT_DATA , &reslimit) == 0){
314 		reslimit.rlim_cur = reslimit.rlim_max;
315 		(void)setrlimit(RLIMIT_DATA , &reslimit);
316 	}
317 
318 	/*
319 	 * should file size limits be waived? if the os limits us, this is
320 	 * needed if we want to write a large archive
321 	 */
322 	if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){
323 		reslimit.rlim_cur = reslimit.rlim_max;
324 		(void)setrlimit(RLIMIT_FSIZE , &reslimit);
325 	}
326 
327 	/*
328 	 * increase the size the stack can grow to
329 	 */
330 	if (getrlimit(RLIMIT_STACK , &reslimit) == 0){
331 		reslimit.rlim_cur = reslimit.rlim_max;
332 		(void)setrlimit(RLIMIT_STACK , &reslimit);
333 	}
334 
335 	/*
336 	 * not really needed, but doesn't hurt
337 	 */
338 	if (getrlimit(RLIMIT_RSS , &reslimit) == 0){
339 		reslimit.rlim_cur = reslimit.rlim_max;
340 		(void)setrlimit(RLIMIT_RSS , &reslimit);
341 	}
342 
343 	/*
344 	 * Handle posix locale
345 	 *
346 	 * set user defines time printing format for -v option
347 	 */
348 	ltmfrmt = getenv("LC_TIME");
349 
350 	/*
351 	 * signal handling to reset stored directory times and modes. Since
352 	 * we deal with broken pipes via failed writes we ignore it. We also
353 	 * deal with exceeed file size limit with failed writes. Cpu time
354 	 * limits is caught and a cleanup is forced. All other "user"
355 	 * generated signals are handled.
356 	 */
357 	if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) ||
358 	    (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) ||
359 	    (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) ||
360 	    (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) {
361 		warn(1, "Unable to set up signal mask");
362 		return(-1);
363 	}
364 	n_hand.sa_mask = s_mask;
365 	n_hand.sa_flags = 0;
366 	n_hand.sa_handler = sig_cleanup;
367 
368 	if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) &&
369 	    (o_hand.sa_handler == SIG_IGN) &&
370 	    (sigaction(SIGHUP, &o_hand, &o_hand) < 0))
371 		goto out;
372 
373 	if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) &&
374 	    (o_hand.sa_handler == SIG_IGN) &&
375 	    (sigaction(SIGTERM, &o_hand, &o_hand) < 0))
376 		goto out;
377 
378 	if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) &&
379 	    (o_hand.sa_handler == SIG_IGN) &&
380 	    (sigaction(SIGINT, &o_hand, &o_hand) < 0))
381 		goto out;
382 
383 	if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) &&
384 	    (o_hand.sa_handler == SIG_IGN) &&
385 	    (sigaction(SIGQUIT, &o_hand, &o_hand) < 0))
386 		goto out;
387 
388 	if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) &&
389 	    (o_hand.sa_handler == SIG_IGN) &&
390 	    (sigaction(SIGXCPU, &o_hand, &o_hand) < 0))
391 		goto out;
392 
393 	n_hand.sa_handler = SIG_IGN;
394 	if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) ||
395 	    (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0))
396 		goto out;
397 	return(0);
398 
399     out:
400 	syswarn(1, errno, "Unable to set up signal handler");
401 	return(-1);
402 }
403