1 /* $Source: /u/mark/src/pax/RCS/extract.c,v $
2 *
3 * $Revision: 1.3 $
4 *
5 * extract.c - Extract files from a tar archive.
6 *
7 * DESCRIPTION
8 *
9 * AUTHOR
10 *
11 * Mark H. Colburn, NAPS International (mark@jhereg.mn.org)
12 *
13 * Sponsored by The USENIX Association for public distribution.
14 *
15 * Copyright (c) 1989 Mark H. Colburn.
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms are permitted
19 * provided that the above copyright notice is duplicated in all such
20 * forms and that any documentation, advertising materials, and other
21 * materials related to such distribution and use acknowledge that the
22 * software was developed * by Mark H. Colburn and sponsored by The
23 * USENIX Association.
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 *
29 * $Log: extract.c,v $
30 * Revision 1.3 89/02/12 10:29:43 mark
31 * Fixed misspelling of Replstr
32 *
33 * Revision 1.2 89/02/12 10:04:24 mark
34 * 1.2 release fixes
35 *
36 * Revision 1.1 88/12/23 18:02:07 mark
37 * Initial revision
38 *
39 */
40
41 #ifndef lint
42 static char *ident = "$Id: extract.c,v 1.3 89/02/12 10:29:43 mark Exp Locker: mark $";
43 static char *copyright = "Copyright (c) 1989 Mark H. Colburn.\nAll rights reserved.\n";
44 #endif /* ! lint */
45
46
47 /* Headers */
48
49 #include "pax.h"
50
51
52 /* Defines */
53
54 /*
55 * Swap bytes.
56 */
57 #define SWAB(n) ((((ushort)(n) >> 8) & 0xff) | (((ushort)(n) << 8) & 0xff00))
58
59
60 /* Function Prototypes */
61
62 #ifdef __STDC__
63
64 static int inbinary(char *, char *, Stat *);
65 static int inascii(char *, char *, Stat *);
66 static int inswab(char *, char *, Stat *);
67 static int readtar(char *, Stat *);
68 static int readcpio(char *, Stat *);
69
70 #else /* !__STDC__ */
71
72 static int inbinary();
73 static int inascii();
74 static int inswab();
75 static int readtar();
76 static int readcpio();
77
78 #endif /* __STDC__ */
79
80
81 /* read_archive - read in an archive
82 *
83 * DESCRIPTION
84 *
85 * Read_archive is the central entry point for reading archives.
86 * Read_archive determines the proper archive functions to call
87 * based upon the archive type being processed.
88 *
89 * RETURNS
90 *
91 */
92
93 #ifdef __STDC__
94
read_archive(void)95 int read_archive(void)
96
97 #else
98
99 int read_archive()
100
101 #endif
102 {
103 Stat sb;
104 char name[PATH_MAX + 1];
105 int match;
106 int pad;
107
108 name_gather(); /* get names from command line */
109 name[0] = '\0';
110 while (get_header(name, &sb) == 0) {
111 match = name_match(name) ^ f_reverse_match;
112 if (f_list) { /* only wanted a table of contents */
113 if (match) {
114 print_entry(name, &sb);
115 }
116 if (((ar_format == TAR)
117 ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
118 : buf_skip((OFFSET) sb.sb_size)) < 0) {
119 warn(name, "File data is corrupt");
120 }
121 } else if (match) {
122 if (rplhead != (Replstr *)NULL) {
123 rpl_name(name);
124 if (strlen(name) == 0) {
125 continue;
126 }
127 }
128 if (get_disposition("extract", name) ||
129 get_newname(name, sizeof(name))) {
130 /* skip file... */
131 if (((ar_format == TAR)
132 ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
133 : buf_skip((OFFSET) sb.sb_size)) < 0) {
134 warn(name, "File data is corrupt");
135 }
136 continue;
137 }
138 if (inentry(name, &sb) < 0) {
139 warn(name, "File data is corrupt");
140 }
141 if (f_verbose) {
142 print_entry(name, &sb);
143 }
144 if (ar_format == TAR && sb.sb_nlink > 1) {
145 /*
146 * This kludge makes sure that the link table is cleared
147 * before attempting to process any other links.
148 */
149 if (sb.sb_nlink > 1) {
150 linkfrom(name, &sb);
151 }
152 }
153 if (ar_format == TAR && (pad = sb.sb_size % BLOCKSIZE) != 0) {
154 pad = BLOCKSIZE - pad;
155 buf_skip((OFFSET) pad);
156 }
157 } else {
158 if (((ar_format == TAR)
159 ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
160 : buf_skip((OFFSET) sb.sb_size)) < 0) {
161 warn(name, "File data is corrupt");
162 }
163 }
164 }
165
166 close_archive();
167 }
168
169
170
171 /* get_header - figures which type of header needs to be read.
172 *
173 * DESCRIPTION
174 *
175 * This is merely a single entry point for the two types of archive
176 * headers which are supported. The correct header is selected
177 * depending on the archive type.
178 *
179 * PARAMETERS
180 *
181 * char *name - name of the file (passed to header routine)
182 * Stat *asb - Stat block for the file (passed to header routine)
183 *
184 * RETURNS
185 *
186 * Returns the value which was returned by the proper header
187 * function.
188 */
189
190 #ifdef __STDC__
191
get_header(char * name,Stat * asb)192 int get_header(char *name, Stat *asb)
193
194 #else
195
196 int get_header(name, asb)
197 char *name;
198 Stat *asb;
199
200 #endif
201 {
202 if (ar_format == TAR) {
203 return(readtar(name, asb));
204 } else {
205 return(readcpio(name, asb));
206 }
207 }
208
209
210 /* readtar - read a tar header
211 *
212 * DESCRIPTION
213 *
214 * Tar_head read a tar format header from the archive. The name
215 * and asb parameters are modified as appropriate for the file listed
216 * in the header. Name is assumed to be a pointer to an array of
217 * at least PATH_MAX bytes.
218 *
219 * PARAMETERS
220 *
221 * char *name - name of the file for which the header is
222 * for. This is modified and passed back to
223 * the caller.
224 * Stat *asb - Stat block for the file for which the header
225 * is for. The fields of the stat structure are
226 * extracted from the archive header. This is
227 * also passed back to the caller.
228 *
229 * RETURNS
230 *
231 * Returns 0 if a valid header was found, or -1 if EOF is
232 * encountered.
233 */
234
235 #ifdef __STDC__
236
readtar(char * name,Stat * asb)237 static int readtar(char *name, Stat *asb)
238
239 #else
240
241 static int readtar(name, asb)
242 char *name;
243 Stat *asb;
244
245 #endif
246 {
247 int status = 3; /* Initial status at start of archive */
248 static int prev_status;
249
250 for (;;) {
251 prev_status = status;
252 status = read_header(name, asb);
253 switch (status) {
254 case 1: /* Valid header */
255 return(0);
256 case 0: /* Invalid header */
257 switch (prev_status) {
258 case 3: /* Error on first record */
259 warn(ar_file, "This doesn't look like a tar archive");
260 /* FALLTHRU */
261 case 2: /* Error after record of zeroes */
262 case 1: /* Error after header rec */
263 warn(ar_file, "Skipping to next file...");
264 /* FALLTHRU */
265 default:
266 case 0: /* Error after error */
267 break;
268 }
269 break;
270
271 case 2: /* Record of zeroes */
272 case EOF: /* End of archive */
273 default:
274 return(-1);
275 }
276 }
277 }
278
279
280 /* readcpio - read a CPIO header
281 *
282 * DESCRIPTION
283 *
284 * Read in a cpio header. Understands how to determine and read ASCII,
285 * binary and byte-swapped binary headers. Quietly translates
286 * old-fashioned binary cpio headers (and arranges to skip the possible
287 * alignment byte). Returns zero if successful, -1 upon archive trailer.
288 *
289 * PARAMETERS
290 *
291 * char *name - name of the file for which the header is
292 * for. This is modified and passed back to
293 * the caller.
294 * Stat *asb - Stat block for the file for which the header
295 * is for. The fields of the stat structure are
296 * extracted from the archive header. This is
297 * also passed back to the caller.
298 *
299 * RETURNS
300 *
301 * Returns 0 if a valid header was found, or -1 if EOF is
302 * encountered.
303 */
304
305 #ifdef __STDC__
306
readcpio(char * name,Stat * asb)307 static int readcpio(char *name, Stat *asb)
308
309 #else
310
311 static int readcpio(name, asb)
312 char *name;
313 Stat *asb;
314
315 #endif
316 {
317 OFFSET skipped;
318 char magic[M_STRLEN];
319 static int align;
320
321 if (align > 0) {
322 buf_skip((OFFSET) align);
323 }
324 align = 0;
325 for (;;) {
326 buf_read(magic, M_STRLEN);
327 skipped = 0;
328 while ((align = inascii(magic, name, asb)) < 0
329 && (align = inbinary(magic, name, asb)) < 0
330 && (align = inswab(magic, name, asb)) < 0) {
331 if (++skipped == 1) {
332 if (total - sizeof(magic) == 0) {
333 fatal("Unrecognizable archive");
334 }
335 warnarch("Bad magic number", (OFFSET) sizeof(magic));
336 if (name[0]) {
337 warn(name, "May be corrupt");
338 }
339 }
340 memcpy(magic, magic + 1, sizeof(magic) - 1);
341 buf_read(magic + sizeof(magic) - 1, 1);
342 }
343 if (skipped) {
344 warnarch("Apparently resynchronized", (OFFSET) sizeof(magic));
345 warn(name, "Continuing");
346 }
347 if (strcmp(name, TRAILER) == 0) {
348 return (-1);
349 }
350 if (nameopt(name) >= 0) {
351 break;
352 }
353 buf_skip((OFFSET) asb->sb_size + align);
354 }
355 #ifdef S_IFLNK
356 if ((asb->sb_mode & S_IFMT) == S_IFLNK) {
357 if (buf_read(asb->sb_link, (uint) asb->sb_size) < 0) {
358 warn(name, "Corrupt symbolic link");
359 return (readcpio(name, asb));
360 }
361 asb->sb_link[asb->sb_size] = '\0';
362 asb->sb_size = 0;
363 }
364 #endif /* S_IFLNK */
365
366 /* destroy absolute pathnames for security reasons */
367 if (name[0] == '/') {
368 if (name[1]) {
369 while (name[0] = name[1]) {
370 ++name;
371 }
372 } else {
373 name[0] = '.';
374 }
375 }
376 asb->sb_atime = asb->sb_ctime = asb->sb_mtime;
377 if (asb->sb_nlink > 1) {
378 linkto(name, asb);
379 }
380 return (0);
381 }
382
383
384 /* inswab - read a reversed by order binary header
385 *
386 * DESCRIPTIONS
387 *
388 * Reads a byte-swapped CPIO binary archive header
389 *
390 * PARMAMETERS
391 *
392 * char *magic - magic number to match
393 * char *name - name of the file which is stored in the header.
394 * (modified and passed back to caller).
395 * Stat *asb - stat block for the file (modified and passed back
396 * to the caller).
397 *
398 *
399 * RETURNS
400 *
401 * Returns the number of trailing alignment bytes to skip; -1 if
402 * unsuccessful.
403 *
404 */
405
406 #ifdef __STDC__
407
inswab(char * magic,char * name,Stat * asb)408 static int inswab(char *magic, char *name, Stat *asb)
409
410 #else
411
412 static int inswab(magic, name, asb)
413 char *magic;
414 char *name;
415 Stat *asb;
416
417 #endif
418 {
419 ushort namesize;
420 uint namefull;
421 Binary binary;
422
423 if (*((ushort *) magic) != SWAB(M_BINARY)) {
424 return (-1);
425 }
426 memcpy((char *) &binary,
427 magic + sizeof(ushort),
428 M_STRLEN - sizeof(ushort));
429 if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
430 sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
431 warnarch("Corrupt swapped header",
432 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
433 return (-1);
434 }
435 asb->sb_dev = (dev_t) SWAB(binary.b_dev);
436 asb->sb_ino = (ino_t) SWAB(binary.b_ino);
437 asb->sb_mode = SWAB(binary.b_mode);
438 asb->sb_uid = SWAB(binary.b_uid);
439 asb->sb_gid = SWAB(binary.b_gid);
440 asb->sb_nlink = SWAB(binary.b_nlink);
441 #ifndef _POSIX_SOURCE
442 asb->sb_rdev = (dev_t) SWAB(binary.b_rdev);
443 #endif
444 asb->sb_mtime = SWAB(binary.b_mtime[0]) << 16 | SWAB(binary.b_mtime[1]);
445 asb->sb_size = SWAB(binary.b_size[0]) << 16 | SWAB(binary.b_size[1]);
446 if ((namesize = SWAB(binary.b_name)) == 0 || namesize >= PATH_MAX) {
447 warnarch("Bad swapped pathname length",
448 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
449 return (-1);
450 }
451 if (buf_read(name, namefull = namesize + namesize % 2) < 0) {
452 warnarch("Corrupt swapped pathname", (OFFSET) namefull);
453 return (-1);
454 }
455 if (name[namesize - 1] != '\0') {
456 warnarch("Bad swapped pathname", (OFFSET) namefull);
457 return (-1);
458 }
459 return (asb->sb_size % 2);
460 }
461
462
463 /* inascii - read in an ASCII cpio header
464 *
465 * DESCRIPTION
466 *
467 * Reads an ASCII format cpio header
468 *
469 * PARAMETERS
470 *
471 * char *magic - magic number to match
472 * char *name - name of the file which is stored in the header.
473 * (modified and passed back to caller).
474 * Stat *asb - stat block for the file (modified and passed back
475 * to the caller).
476 *
477 * RETURNS
478 *
479 * Returns zero if successful; -1 otherwise. Assumes that the entire
480 * magic number has been read.
481 */
482
483 #ifdef __STDC__
484
inascii(char * magic,char * name,Stat * asb)485 static int inascii(char *magic, char *name, Stat *asb)
486
487 #else
488
489 static int inascii(magic, name, asb)
490 char *magic;
491 char *name;
492 Stat *asb;
493
494 #endif
495 {
496 uint namelen;
497 char header[H_STRLEN + 1];
498 #ifdef _POSIX_SOURCE
499 dev_t dummyrdev;
500 #endif
501
502 if (strncmp(magic, M_ASCII, M_STRLEN) != 0) {
503 return (-1);
504 }
505 if (buf_read(header, H_STRLEN) < 0) {
506 warnarch("Corrupt ASCII header", (OFFSET) H_STRLEN);
507 return (-1);
508 }
509 header[H_STRLEN] = '\0';
510 if (sscanf(header, H_SCAN, &asb->sb_dev,
511 &asb->sb_ino, &asb->sb_mode, &asb->sb_uid,
512 #ifdef _POSIX_SOURCE
513 &asb->sb_gid, &asb->sb_nlink, &dummyrdev,
514 #else
515 &asb->sb_gid, &asb->sb_nlink, &asb->sb_rdev,
516 #endif
517 &asb->sb_mtime, &namelen, &asb->sb_size) != H_COUNT) {
518 warnarch("Bad ASCII header", (OFFSET) H_STRLEN);
519 return (-1);
520 }
521 if (namelen == 0 || namelen >= PATH_MAX) {
522 warnarch("Bad ASCII pathname length", (OFFSET) H_STRLEN);
523 return (-1);
524 }
525 if (buf_read(name, namelen) < 0) {
526 warnarch("Corrupt ASCII pathname", (OFFSET) namelen);
527 return (-1);
528 }
529 if (name[namelen - 1] != '\0') {
530 warnarch("Bad ASCII pathname", (OFFSET) namelen);
531 return (-1);
532 }
533 return (0);
534 }
535
536
537 /* inbinary - read a binary header
538 *
539 * DESCRIPTION
540 *
541 * Reads a CPIO format binary header.
542 *
543 * PARAMETERS
544 *
545 * char *magic - magic number to match
546 * char *name - name of the file which is stored in the header.
547 * (modified and passed back to caller).
548 * Stat *asb - stat block for the file (modified and passed back
549 * to the caller).
550 *
551 * RETURNS
552 *
553 * Returns the number of trailing alignment bytes to skip; -1 if
554 * unsuccessful.
555 */
556
557 #ifdef __STDC__
558
inbinary(char * magic,char * name,Stat * asb)559 static int inbinary(char *magic, char *name, Stat *asb)
560
561 #else
562
563 static int inbinary(magic, name, asb)
564 char *magic;
565 char *name;
566 Stat *asb;
567
568 #endif
569 {
570 uint namefull;
571 Binary binary;
572
573 if (*((ushort *) magic) != M_BINARY) {
574 return (-1);
575 }
576 memcpy((char *) &binary,
577 magic + sizeof(ushort),
578 M_STRLEN - sizeof(ushort));
579 if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
580 sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
581 warnarch("Corrupt binary header",
582 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
583 return (-1);
584 }
585 asb->sb_dev = binary.b_dev;
586 asb->sb_ino = binary.b_ino;
587 asb->sb_mode = binary.b_mode;
588 asb->sb_uid = binary.b_uid;
589 asb->sb_gid = binary.b_gid;
590 asb->sb_nlink = binary.b_nlink;
591 #ifndef _POSIX_SOURCE
592 asb->sb_rdev = binary.b_rdev;
593 #endif
594 asb->sb_mtime = binary.b_mtime[0] << 16 | binary.b_mtime[1];
595 asb->sb_size = binary.b_size[0] << 16 | binary.b_size[1];
596 if (binary.b_name == 0 || binary.b_name >= PATH_MAX) {
597 warnarch("Bad binary pathname length",
598 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
599 return (-1);
600 }
601 if (buf_read(name, namefull = binary.b_name + binary.b_name % 2) < 0) {
602 warnarch("Corrupt binary pathname", (OFFSET) namefull);
603 return (-1);
604 }
605 if (name[binary.b_name - 1] != '\0') {
606 warnarch("Bad binary pathname", (OFFSET) namefull);
607 return (-1);
608 }
609 return (asb->sb_size % 2);
610 }
611