xref: /plan9/sys/src/ape/cmd/pax/extract.c (revision 9a747e4fd48b9f4522c70c07e8f882a15030f964)
1 /* $Source: /u/mark/src/pax/RCS/extract.c,v $
2  *
3  * $Revision: 1.3 $
4  *
5  * extract.c - Extract files from a tar archive.
6  *
7  * DESCRIPTION
8  *
9  * AUTHOR
10  *
11  *	Mark H. Colburn, NAPS International (mark@jhereg.mn.org)
12  *
13  * Sponsored by The USENIX Association for public distribution.
14  *
15  * Copyright (c) 1989 Mark H. Colburn.
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms are permitted
19  * provided that the above copyright notice is duplicated in all such
20  * forms and that any documentation, advertising materials, and other
21  * materials related to such distribution and use acknowledge that the
22  * software was developed * by Mark H. Colburn and sponsored by The
23  * USENIX Association.
24  *
25  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
27  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  *
29  * $Log:	extract.c,v $
30  * Revision 1.3  89/02/12  10:29:43  mark
31  * Fixed misspelling of Replstr
32  *
33  * Revision 1.2  89/02/12  10:04:24  mark
34  * 1.2 release fixes
35  *
36  * Revision 1.1  88/12/23  18:02:07  mark
37  * Initial revision
38  *
39  */
40 
41 #ifndef lint
42 static char *ident = "$Id: extract.c,v 1.3 89/02/12 10:29:43 mark Exp Locker: mark $";
43 static char *copyright = "Copyright (c) 1989 Mark H. Colburn.\nAll rights reserved.\n";
44 #endif /* ! lint */
45 
46 
47 /* Headers */
48 
49 #include "pax.h"
50 
51 
52 /* Defines */
53 
54 /*
55  * Swap bytes.
56  */
57 #define	SWAB(n)	((((ushort)(n) >> 8) & 0xff) | (((ushort)(n) << 8) & 0xff00))
58 
59 
60 /* Function Prototypes */
61 
62 #ifdef __STDC__
63 
64 static int inbinary(char *, char *, Stat *);
65 static int inascii(char *, char *, Stat *);
66 static int inswab(char *, char *, Stat *);
67 static int readtar(char *, Stat *);
68 static int readcpio(char *, Stat *);
69 
70 #else /* !__STDC__ */
71 
72 static int inbinary();
73 static int inascii();
74 static int inswab();
75 static int readtar();
76 static int readcpio();
77 
78 #endif /* __STDC__ */
79 
80 
81 /* read_archive - read in an archive
82  *
83  * DESCRIPTION
84  *
85  *	Read_archive is the central entry point for reading archives.
86  *	Read_archive determines the proper archive functions to call
87  *	based upon the archive type being processed.
88  *
89  * RETURNS
90  *
91  */
92 
93 #ifdef __STDC__
94 
read_archive(void)95 int read_archive(void)
96 
97 #else
98 
99 int read_archive()
100 
101 #endif
102 {
103     Stat            sb;
104     char            name[PATH_MAX + 1];
105     int             match;
106     int		    pad;
107 
108     name_gather();		/* get names from command line */
109     name[0] = '\0';
110     while (get_header(name, &sb) == 0) {
111 	match = name_match(name) ^ f_reverse_match;
112 	if (f_list) {		/* only wanted a table of contents */
113 	    if (match) {
114 		print_entry(name, &sb);
115 	    }
116 	    if (((ar_format == TAR)
117 		? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
118 		: buf_skip((OFFSET) sb.sb_size)) < 0) {
119 		warn(name, "File data is corrupt");
120 	    }
121 	} else if (match) {
122 	    if (rplhead != (Replstr *)NULL) {
123 		rpl_name(name);
124 		if (strlen(name) == 0) {
125 		    continue;
126 		}
127 	    }
128 	    if (get_disposition("extract", name) ||
129                 get_newname(name, sizeof(name))) {
130 		/* skip file... */
131 		if (((ar_format == TAR)
132 		    ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
133 		    : buf_skip((OFFSET) sb.sb_size)) < 0) {
134 		    warn(name, "File data is corrupt");
135 		}
136 		continue;
137 	    }
138 	    if (inentry(name, &sb) < 0) {
139 		warn(name, "File data is corrupt");
140 	    }
141 	    if (f_verbose) {
142 		print_entry(name, &sb);
143 	    }
144 	    if (ar_format == TAR && sb.sb_nlink > 1) {
145 		/*
146 		 * This kludge makes sure that the link table is cleared
147 		 * before attempting to process any other links.
148 		 */
149 		if (sb.sb_nlink > 1) {
150 		    linkfrom(name, &sb);
151 		}
152 	    }
153 	    if (ar_format == TAR && (pad = sb.sb_size % BLOCKSIZE) != 0) {
154 		pad = BLOCKSIZE - pad;
155 		buf_skip((OFFSET) pad);
156 	    }
157 	} else {
158 	    if (((ar_format == TAR)
159 		? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
160 		: buf_skip((OFFSET) sb.sb_size)) < 0) {
161 		warn(name, "File data is corrupt");
162 	    }
163 	}
164     }
165 
166     close_archive();
167 }
168 
169 
170 
171 /* get_header - figures which type of header needs to be read.
172  *
173  * DESCRIPTION
174  *
175  *	This is merely a single entry point for the two types of archive
176  *	headers which are supported.  The correct header is selected
177  *	depending on the archive type.
178  *
179  * PARAMETERS
180  *
181  *	char	*name	- name of the file (passed to header routine)
182  *	Stat	*asb	- Stat block for the file (passed to header routine)
183  *
184  * RETURNS
185  *
186  *	Returns the value which was returned by the proper header
187  *	function.
188  */
189 
190 #ifdef __STDC__
191 
get_header(char * name,Stat * asb)192 int get_header(char *name, Stat *asb)
193 
194 #else
195 
196 int get_header(name, asb)
197 char *name;
198 Stat *asb;
199 
200 #endif
201 {
202     if (ar_format == TAR) {
203 	return(readtar(name, asb));
204     } else {
205 	return(readcpio(name, asb));
206     }
207 }
208 
209 
210 /* readtar - read a tar header
211  *
212  * DESCRIPTION
213  *
214  *	Tar_head read a tar format header from the archive.  The name
215  *	and asb parameters are modified as appropriate for the file listed
216  *	in the header.   Name is assumed to be a pointer to an array of
217  *	at least PATH_MAX bytes.
218  *
219  * PARAMETERS
220  *
221  *	char	*name 	- name of the file for which the header is
222  *			  for.  This is modified and passed back to
223  *			  the caller.
224  *	Stat	*asb	- Stat block for the file for which the header
225  *			  is for.  The fields of the stat structure are
226  *			  extracted from the archive header.  This is
227  *			  also passed back to the caller.
228  *
229  * RETURNS
230  *
231  *	Returns 0 if a valid header was found, or -1 if EOF is
232  *	encountered.
233  */
234 
235 #ifdef __STDC__
236 
readtar(char * name,Stat * asb)237 static int readtar(char *name, Stat *asb)
238 
239 #else
240 
241 static int readtar(name, asb)
242 char	*name;
243 Stat    *asb;
244 
245 #endif
246 {
247     int             status = 3;	/* Initial status at start of archive */
248     static int      prev_status;
249 
250     for (;;) {
251 	prev_status = status;
252 	status = read_header(name, asb);
253 	switch (status) {
254 	case 1:		/* Valid header */
255 		return(0);
256 	case 0:		/* Invalid header */
257 	    switch (prev_status) {
258 	    case 3:		/* Error on first record */
259 		warn(ar_file, "This doesn't look like a tar archive");
260 		/* FALLTHRU */
261 	    case 2:		/* Error after record of zeroes */
262 	    case 1:		/* Error after header rec */
263 		warn(ar_file, "Skipping to next file...");
264 		/* FALLTHRU */
265 	    default:
266 	    case 0:		/* Error after error */
267 		break;
268 	    }
269 	    break;
270 
271 	case 2:			/* Record of zeroes */
272 	case EOF:		/* End of archive */
273 	default:
274 	    return(-1);
275 	}
276     }
277 }
278 
279 
280 /* readcpio - read a CPIO header
281  *
282  * DESCRIPTION
283  *
284  *	Read in a cpio header.  Understands how to determine and read ASCII,
285  *	binary and byte-swapped binary headers.  Quietly translates
286  *	old-fashioned binary cpio headers (and arranges to skip the possible
287  *	alignment byte). Returns zero if successful, -1 upon archive trailer.
288  *
289  * PARAMETERS
290  *
291  *	char	*name 	- name of the file for which the header is
292  *			  for.  This is modified and passed back to
293  *			  the caller.
294  *	Stat	*asb	- Stat block for the file for which the header
295  *			  is for.  The fields of the stat structure are
296  *			  extracted from the archive header.  This is
297  *			  also passed back to the caller.
298  *
299  * RETURNS
300  *
301  *	Returns 0 if a valid header was found, or -1 if EOF is
302  *	encountered.
303  */
304 
305 #ifdef __STDC__
306 
readcpio(char * name,Stat * asb)307 static int readcpio(char *name, Stat *asb)
308 
309 #else
310 
311 static int readcpio(name, asb)
312 char           *name;
313 Stat           *asb;
314 
315 #endif
316 {
317     OFFSET          skipped;
318     char            magic[M_STRLEN];
319     static int      align;
320 
321     if (align > 0) {
322 	buf_skip((OFFSET) align);
323     }
324     align = 0;
325     for (;;) {
326 	buf_read(magic, M_STRLEN);
327 	skipped = 0;
328 	while ((align = inascii(magic, name, asb)) < 0
329 	       && (align = inbinary(magic, name, asb)) < 0
330 	       && (align = inswab(magic, name, asb)) < 0) {
331 	    if (++skipped == 1) {
332 		if (total - sizeof(magic) == 0) {
333 		    fatal("Unrecognizable archive");
334 		}
335 		warnarch("Bad magic number", (OFFSET) sizeof(magic));
336 		if (name[0]) {
337 		    warn(name, "May be corrupt");
338 		}
339 	    }
340 	    memcpy(magic, magic + 1, sizeof(magic) - 1);
341 	    buf_read(magic + sizeof(magic) - 1, 1);
342 	}
343 	if (skipped) {
344 	    warnarch("Apparently resynchronized", (OFFSET) sizeof(magic));
345 	    warn(name, "Continuing");
346 	}
347 	if (strcmp(name, TRAILER) == 0) {
348 	    return (-1);
349 	}
350 	if (nameopt(name) >= 0) {
351 	    break;
352 	}
353 	buf_skip((OFFSET) asb->sb_size + align);
354     }
355 #ifdef	S_IFLNK
356     if ((asb->sb_mode & S_IFMT) == S_IFLNK) {
357 	if (buf_read(asb->sb_link, (uint) asb->sb_size) < 0) {
358 	    warn(name, "Corrupt symbolic link");
359 	    return (readcpio(name, asb));
360 	}
361 	asb->sb_link[asb->sb_size] = '\0';
362 	asb->sb_size = 0;
363     }
364 #endif				/* S_IFLNK */
365 
366     /* destroy absolute pathnames for security reasons */
367     if (name[0] == '/') {
368 	if (name[1]) {
369 	    while (name[0] = name[1]) {
370 		++name;
371 	    }
372 	} else {
373 	    name[0] = '.';
374 	}
375     }
376     asb->sb_atime = asb->sb_ctime = asb->sb_mtime;
377     if (asb->sb_nlink > 1) {
378 	linkto(name, asb);
379     }
380     return (0);
381 }
382 
383 
384 /* inswab - read a reversed by order binary header
385  *
386  * DESCRIPTIONS
387  *
388  *	Reads a byte-swapped CPIO binary archive header
389  *
390  * PARMAMETERS
391  *
392  *	char	*magic	- magic number to match
393  *	char	*name	- name of the file which is stored in the header.
394  *			  (modified and passed back to caller).
395  *	Stat	*asb	- stat block for the file (modified and passed back
396  *			  to the caller).
397  *
398  *
399  * RETURNS
400  *
401  * 	Returns the number of trailing alignment bytes to skip; -1 if
402  *	unsuccessful.
403  *
404  */
405 
406 #ifdef __STDC__
407 
inswab(char * magic,char * name,Stat * asb)408 static int inswab(char *magic, char *name, Stat *asb)
409 
410 #else
411 
412 static int inswab(magic, name, asb)
413 char           *magic;
414 char           *name;
415 Stat           *asb;
416 
417 #endif
418 {
419     ushort          namesize;
420     uint            namefull;
421     Binary          binary;
422 
423     if (*((ushort *) magic) != SWAB(M_BINARY)) {
424 	return (-1);
425     }
426     memcpy((char *) &binary,
427 		  magic + sizeof(ushort),
428 		  M_STRLEN - sizeof(ushort));
429     if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
430 		 sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
431 	warnarch("Corrupt swapped header",
432 		 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
433 	return (-1);
434     }
435     asb->sb_dev = (dev_t) SWAB(binary.b_dev);
436     asb->sb_ino = (ino_t) SWAB(binary.b_ino);
437     asb->sb_mode = SWAB(binary.b_mode);
438     asb->sb_uid = SWAB(binary.b_uid);
439     asb->sb_gid = SWAB(binary.b_gid);
440     asb->sb_nlink = SWAB(binary.b_nlink);
441 #ifndef _POSIX_SOURCE
442     asb->sb_rdev = (dev_t) SWAB(binary.b_rdev);
443 #endif
444     asb->sb_mtime = SWAB(binary.b_mtime[0]) << 16 | SWAB(binary.b_mtime[1]);
445     asb->sb_size = SWAB(binary.b_size[0]) << 16 | SWAB(binary.b_size[1]);
446     if ((namesize = SWAB(binary.b_name)) == 0 || namesize >= PATH_MAX) {
447 	warnarch("Bad swapped pathname length",
448 		 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
449 	return (-1);
450     }
451     if (buf_read(name, namefull = namesize + namesize % 2) < 0) {
452 	warnarch("Corrupt swapped pathname", (OFFSET) namefull);
453 	return (-1);
454     }
455     if (name[namesize - 1] != '\0') {
456 	warnarch("Bad swapped pathname", (OFFSET) namefull);
457 	return (-1);
458     }
459     return (asb->sb_size % 2);
460 }
461 
462 
463 /* inascii - read in an ASCII cpio header
464  *
465  * DESCRIPTION
466  *
467  *	Reads an ASCII format cpio header
468  *
469  * PARAMETERS
470  *
471  *	char	*magic	- magic number to match
472  *	char	*name	- name of the file which is stored in the header.
473  *			  (modified and passed back to caller).
474  *	Stat	*asb	- stat block for the file (modified and passed back
475  *			  to the caller).
476  *
477  * RETURNS
478  *
479  * 	Returns zero if successful; -1 otherwise. Assumes that  the entire
480  *	magic number has been read.
481  */
482 
483 #ifdef __STDC__
484 
inascii(char * magic,char * name,Stat * asb)485 static int inascii(char *magic, char *name, Stat *asb)
486 
487 #else
488 
489 static int inascii(magic, name, asb)
490 char           *magic;
491 char           *name;
492 Stat           *asb;
493 
494 #endif
495 {
496     uint            namelen;
497     char            header[H_STRLEN + 1];
498 #ifdef _POSIX_SOURCE
499     dev_t	    dummyrdev;
500 #endif
501 
502     if (strncmp(magic, M_ASCII, M_STRLEN) != 0) {
503 	return (-1);
504     }
505     if (buf_read(header, H_STRLEN) < 0) {
506 	warnarch("Corrupt ASCII header", (OFFSET) H_STRLEN);
507 	return (-1);
508     }
509     header[H_STRLEN] = '\0';
510     if (sscanf(header, H_SCAN, &asb->sb_dev,
511 	       &asb->sb_ino, &asb->sb_mode, &asb->sb_uid,
512 #ifdef _POSIX_SOURCE
513 	       &asb->sb_gid, &asb->sb_nlink, &dummyrdev,
514 #else
515 	       &asb->sb_gid, &asb->sb_nlink, &asb->sb_rdev,
516 #endif
517 	       &asb->sb_mtime, &namelen, &asb->sb_size) != H_COUNT) {
518 	warnarch("Bad ASCII header", (OFFSET) H_STRLEN);
519 	return (-1);
520     }
521     if (namelen == 0 || namelen >= PATH_MAX) {
522 	warnarch("Bad ASCII pathname length", (OFFSET) H_STRLEN);
523 	return (-1);
524     }
525     if (buf_read(name, namelen) < 0) {
526 	warnarch("Corrupt ASCII pathname", (OFFSET) namelen);
527 	return (-1);
528     }
529     if (name[namelen - 1] != '\0') {
530 	warnarch("Bad ASCII pathname", (OFFSET) namelen);
531 	return (-1);
532     }
533     return (0);
534 }
535 
536 
537 /* inbinary - read a binary header
538  *
539  * DESCRIPTION
540  *
541  *	Reads a CPIO format binary header.
542  *
543  * PARAMETERS
544  *
545  *	char	*magic	- magic number to match
546  *	char	*name	- name of the file which is stored in the header.
547  *			  (modified and passed back to caller).
548  *	Stat	*asb	- stat block for the file (modified and passed back
549  *			  to the caller).
550  *
551  * RETURNS
552  *
553  * 	Returns the number of trailing alignment bytes to skip; -1 if
554  *	unsuccessful.
555  */
556 
557 #ifdef __STDC__
558 
inbinary(char * magic,char * name,Stat * asb)559 static int inbinary(char *magic, char *name, Stat *asb)
560 
561 #else
562 
563 static int inbinary(magic, name, asb)
564 char           *magic;
565 char           *name;
566 Stat           *asb;
567 
568 #endif
569 {
570     uint            namefull;
571     Binary          binary;
572 
573     if (*((ushort *) magic) != M_BINARY) {
574 	return (-1);
575     }
576     memcpy((char *) &binary,
577 		  magic + sizeof(ushort),
578 		  M_STRLEN - sizeof(ushort));
579     if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
580 		 sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
581 	warnarch("Corrupt binary header",
582 		 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
583 	return (-1);
584     }
585     asb->sb_dev = binary.b_dev;
586     asb->sb_ino = binary.b_ino;
587     asb->sb_mode = binary.b_mode;
588     asb->sb_uid = binary.b_uid;
589     asb->sb_gid = binary.b_gid;
590     asb->sb_nlink = binary.b_nlink;
591 #ifndef _POSIX_SOURCE
592     asb->sb_rdev = binary.b_rdev;
593 #endif
594     asb->sb_mtime = binary.b_mtime[0] << 16 | binary.b_mtime[1];
595     asb->sb_size = binary.b_size[0] << 16 | binary.b_size[1];
596     if (binary.b_name == 0 || binary.b_name >= PATH_MAX) {
597 	warnarch("Bad binary pathname length",
598 		 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
599 	return (-1);
600     }
601     if (buf_read(name, namefull = binary.b_name + binary.b_name % 2) < 0) {
602 	warnarch("Corrupt binary pathname", (OFFSET) namefull);
603 	return (-1);
604     }
605     if (name[binary.b_name - 1] != '\0') {
606 	warnarch("Bad binary pathname", (OFFSET) namefull);
607 	return (-1);
608     }
609     return (asb->sb_size % 2);
610 }
611