1 /* $Source: /u/mark/src/pax/RCS/extract.c,v $ 2 * 3 * $Revision: 1.3 $ 4 * 5 * extract.c - Extract files from a tar archive. 6 * 7 * DESCRIPTION 8 * 9 * AUTHOR 10 * 11 * Mark H. Colburn, NAPS International (mark@jhereg.mn.org) 12 * 13 * Sponsored by The USENIX Association for public distribution. 14 * 15 * Copyright (c) 1989 Mark H. Colburn. 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms are permitted 19 * provided that the above copyright notice is duplicated in all such 20 * forms and that any documentation, advertising materials, and other 21 * materials related to such distribution and use acknowledge that the 22 * software was developed * by Mark H. Colburn and sponsored by The 23 * USENIX Association. 24 * 25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 28 * 29 * $Log: extract.c,v $ 30 * Revision 1.3 89/02/12 10:29:43 mark 31 * Fixed misspelling of Replstr 32 * 33 * Revision 1.2 89/02/12 10:04:24 mark 34 * 1.2 release fixes 35 * 36 * Revision 1.1 88/12/23 18:02:07 mark 37 * Initial revision 38 * 39 */ 40 41 #ifndef lint 42 static char *ident = "$Id: extract.c,v 1.3 89/02/12 10:29:43 mark Exp Locker: mark $"; 43 static char *copyright = "Copyright (c) 1989 Mark H. Colburn.\nAll rights reserved.\n"; 44 #endif /* ! lint */ 45 46 47 /* Headers */ 48 49 #include "pax.h" 50 51 52 /* Defines */ 53 54 /* 55 * Swap bytes. 56 */ 57 #define SWAB(n) ((((ushort)(n) >> 8) & 0xff) | (((ushort)(n) << 8) & 0xff00)) 58 59 60 /* Function Prototypes */ 61 62 #ifdef __STDC__ 63 64 static int inbinary(char *, char *, Stat *); 65 static int inascii(char *, char *, Stat *); 66 static int inswab(char *, char *, Stat *); 67 static int readtar(char *, Stat *); 68 static int readcpio(char *, Stat *); 69 70 #else /* !__STDC__ */ 71 72 static int inbinary(); 73 static int inascii(); 74 static int inswab(); 75 static int readtar(); 76 static int readcpio(); 77 78 #endif /* __STDC__ */ 79 80 81 /* read_archive - read in an archive 82 * 83 * DESCRIPTION 84 * 85 * Read_archive is the central entry point for reading archives. 86 * Read_archive determines the proper archive functions to call 87 * based upon the archive type being processed. 88 * 89 * RETURNS 90 * 91 */ 92 93 #ifdef __STDC__ 94 95 int read_archive(void) 96 97 #else 98 99 int read_archive() 100 101 #endif 102 { 103 Stat sb; 104 char name[PATH_MAX + 1]; 105 int match; 106 int pad; 107 108 name_gather(); /* get names from command line */ 109 name[0] = '\0'; 110 while (get_header(name, &sb) == 0) { 111 match = name_match(name) ^ f_reverse_match; 112 if (f_list) { /* only wanted a table of contents */ 113 if (match) { 114 print_entry(name, &sb); 115 } 116 if (((ar_format == TAR) 117 ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE)) 118 : buf_skip((OFFSET) sb.sb_size)) < 0) { 119 warn(name, "File data is corrupt"); 120 } 121 } else if (match) { 122 if (rplhead != (Replstr *)NULL) { 123 rpl_name(name); 124 if (strlen(name) == 0) { 125 continue; 126 } 127 } 128 if (get_disposition("extract", name) || 129 get_newname(name, sizeof(name))) { 130 /* skip file... */ 131 if (((ar_format == TAR) 132 ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE)) 133 : buf_skip((OFFSET) sb.sb_size)) < 0) { 134 warn(name, "File data is corrupt"); 135 } 136 continue; 137 } 138 if (inentry(name, &sb) < 0) { 139 warn(name, "File data is corrupt"); 140 } 141 if (f_verbose) { 142 print_entry(name, &sb); 143 } 144 if (ar_format == TAR && sb.sb_nlink > 1) { 145 /* 146 * This kludge makes sure that the link table is cleared 147 * before attempting to process any other links. 148 */ 149 if (sb.sb_nlink > 1) { 150 linkfrom(name, &sb); 151 } 152 } 153 if (ar_format == TAR && (pad = sb.sb_size % BLOCKSIZE) != 0) { 154 pad = BLOCKSIZE - pad; 155 buf_skip((OFFSET) pad); 156 } 157 } else { 158 if (((ar_format == TAR) 159 ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE)) 160 : buf_skip((OFFSET) sb.sb_size)) < 0) { 161 warn(name, "File data is corrupt"); 162 } 163 } 164 } 165 166 close_archive(); 167 } 168 169 170 171 /* get_header - figures which type of header needs to be read. 172 * 173 * DESCRIPTION 174 * 175 * This is merely a single entry point for the two types of archive 176 * headers which are supported. The correct header is selected 177 * depending on the archive type. 178 * 179 * PARAMETERS 180 * 181 * char *name - name of the file (passed to header routine) 182 * Stat *asb - Stat block for the file (passed to header routine) 183 * 184 * RETURNS 185 * 186 * Returns the value which was returned by the proper header 187 * function. 188 */ 189 190 #ifdef __STDC__ 191 192 int get_header(char *name, Stat *asb) 193 194 #else 195 196 int get_header(name, asb) 197 char *name; 198 Stat *asb; 199 200 #endif 201 { 202 if (ar_format == TAR) { 203 return(readtar(name, asb)); 204 } else { 205 return(readcpio(name, asb)); 206 } 207 } 208 209 210 /* readtar - read a tar header 211 * 212 * DESCRIPTION 213 * 214 * Tar_head read a tar format header from the archive. The name 215 * and asb parameters are modified as appropriate for the file listed 216 * in the header. Name is assumed to be a pointer to an array of 217 * at least PATH_MAX bytes. 218 * 219 * PARAMETERS 220 * 221 * char *name - name of the file for which the header is 222 * for. This is modified and passed back to 223 * the caller. 224 * Stat *asb - Stat block for the file for which the header 225 * is for. The fields of the stat structure are 226 * extracted from the archive header. This is 227 * also passed back to the caller. 228 * 229 * RETURNS 230 * 231 * Returns 0 if a valid header was found, or -1 if EOF is 232 * encountered. 233 */ 234 235 #ifdef __STDC__ 236 237 static int readtar(char *name, Stat *asb) 238 239 #else 240 241 static int readtar(name, asb) 242 char *name; 243 Stat *asb; 244 245 #endif 246 { 247 int status = 3; /* Initial status at start of archive */ 248 static int prev_status; 249 250 for (;;) { 251 prev_status = status; 252 status = read_header(name, asb); 253 switch (status) { 254 case 1: /* Valid header */ 255 return(0); 256 case 0: /* Invalid header */ 257 switch (prev_status) { 258 case 3: /* Error on first record */ 259 warn(ar_file, "This doesn't look like a tar archive"); 260 /* FALLTHRU */ 261 case 2: /* Error after record of zeroes */ 262 case 1: /* Error after header rec */ 263 warn(ar_file, "Skipping to next file..."); 264 /* FALLTHRU */ 265 default: 266 case 0: /* Error after error */ 267 break; 268 } 269 break; 270 271 case 2: /* Record of zeroes */ 272 case EOF: /* End of archive */ 273 default: 274 return(-1); 275 } 276 } 277 } 278 279 280 /* readcpio - read a CPIO header 281 * 282 * DESCRIPTION 283 * 284 * Read in a cpio header. Understands how to determine and read ASCII, 285 * binary and byte-swapped binary headers. Quietly translates 286 * old-fashioned binary cpio headers (and arranges to skip the possible 287 * alignment byte). Returns zero if successful, -1 upon archive trailer. 288 * 289 * PARAMETERS 290 * 291 * char *name - name of the file for which the header is 292 * for. This is modified and passed back to 293 * the caller. 294 * Stat *asb - Stat block for the file for which the header 295 * is for. The fields of the stat structure are 296 * extracted from the archive header. This is 297 * also passed back to the caller. 298 * 299 * RETURNS 300 * 301 * Returns 0 if a valid header was found, or -1 if EOF is 302 * encountered. 303 */ 304 305 #ifdef __STDC__ 306 307 static int readcpio(char *name, Stat *asb) 308 309 #else 310 311 static int readcpio(name, asb) 312 char *name; 313 Stat *asb; 314 315 #endif 316 { 317 OFFSET skipped; 318 char magic[M_STRLEN]; 319 static int align; 320 321 if (align > 0) { 322 buf_skip((OFFSET) align); 323 } 324 align = 0; 325 for (;;) { 326 buf_read(magic, M_STRLEN); 327 skipped = 0; 328 while ((align = inascii(magic, name, asb)) < 0 329 && (align = inbinary(magic, name, asb)) < 0 330 && (align = inswab(magic, name, asb)) < 0) { 331 if (++skipped == 1) { 332 if (total - sizeof(magic) == 0) { 333 fatal("Unrecognizable archive"); 334 } 335 warnarch("Bad magic number", (OFFSET) sizeof(magic)); 336 if (name[0]) { 337 warn(name, "May be corrupt"); 338 } 339 } 340 memcpy(magic, magic + 1, sizeof(magic) - 1); 341 buf_read(magic + sizeof(magic) - 1, 1); 342 } 343 if (skipped) { 344 warnarch("Apparently resynchronized", (OFFSET) sizeof(magic)); 345 warn(name, "Continuing"); 346 } 347 if (strcmp(name, TRAILER) == 0) { 348 return (-1); 349 } 350 if (nameopt(name) >= 0) { 351 break; 352 } 353 buf_skip((OFFSET) asb->sb_size + align); 354 } 355 #ifdef S_IFLNK 356 if ((asb->sb_mode & S_IFMT) == S_IFLNK) { 357 if (buf_read(asb->sb_link, (uint) asb->sb_size) < 0) { 358 warn(name, "Corrupt symbolic link"); 359 return (readcpio(name, asb)); 360 } 361 asb->sb_link[asb->sb_size] = '\0'; 362 asb->sb_size = 0; 363 } 364 #endif /* S_IFLNK */ 365 366 /* destroy absolute pathnames for security reasons */ 367 if (name[0] == '/') { 368 if (name[1]) { 369 while (name[0] = name[1]) { 370 ++name; 371 } 372 } else { 373 name[0] = '.'; 374 } 375 } 376 asb->sb_atime = asb->sb_ctime = asb->sb_mtime; 377 if (asb->sb_nlink > 1) { 378 linkto(name, asb); 379 } 380 return (0); 381 } 382 383 384 /* inswab - read a reversed by order binary header 385 * 386 * DESCRIPTIONS 387 * 388 * Reads a byte-swapped CPIO binary archive header 389 * 390 * PARMAMETERS 391 * 392 * char *magic - magic number to match 393 * char *name - name of the file which is stored in the header. 394 * (modified and passed back to caller). 395 * Stat *asb - stat block for the file (modified and passed back 396 * to the caller). 397 * 398 * 399 * RETURNS 400 * 401 * Returns the number of trailing alignment bytes to skip; -1 if 402 * unsuccessful. 403 * 404 */ 405 406 #ifdef __STDC__ 407 408 static int inswab(char *magic, char *name, Stat *asb) 409 410 #else 411 412 static int inswab(magic, name, asb) 413 char *magic; 414 char *name; 415 Stat *asb; 416 417 #endif 418 { 419 ushort namesize; 420 uint namefull; 421 Binary binary; 422 423 if (*((ushort *) magic) != SWAB(M_BINARY)) { 424 return (-1); 425 } 426 memcpy((char *) &binary, 427 magic + sizeof(ushort), 428 M_STRLEN - sizeof(ushort)); 429 if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort), 430 sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) { 431 warnarch("Corrupt swapped header", 432 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort))); 433 return (-1); 434 } 435 asb->sb_dev = (dev_t) SWAB(binary.b_dev); 436 asb->sb_ino = (ino_t) SWAB(binary.b_ino); 437 asb->sb_mode = SWAB(binary.b_mode); 438 asb->sb_uid = SWAB(binary.b_uid); 439 asb->sb_gid = SWAB(binary.b_gid); 440 asb->sb_nlink = SWAB(binary.b_nlink); 441 #ifndef _POSIX_SOURCE 442 asb->sb_rdev = (dev_t) SWAB(binary.b_rdev); 443 #endif 444 asb->sb_mtime = SWAB(binary.b_mtime[0]) << 16 | SWAB(binary.b_mtime[1]); 445 asb->sb_size = SWAB(binary.b_size[0]) << 16 | SWAB(binary.b_size[1]); 446 if ((namesize = SWAB(binary.b_name)) == 0 || namesize >= PATH_MAX) { 447 warnarch("Bad swapped pathname length", 448 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort))); 449 return (-1); 450 } 451 if (buf_read(name, namefull = namesize + namesize % 2) < 0) { 452 warnarch("Corrupt swapped pathname", (OFFSET) namefull); 453 return (-1); 454 } 455 if (name[namesize - 1] != '\0') { 456 warnarch("Bad swapped pathname", (OFFSET) namefull); 457 return (-1); 458 } 459 return (asb->sb_size % 2); 460 } 461 462 463 /* inascii - read in an ASCII cpio header 464 * 465 * DESCRIPTION 466 * 467 * Reads an ASCII format cpio header 468 * 469 * PARAMETERS 470 * 471 * char *magic - magic number to match 472 * char *name - name of the file which is stored in the header. 473 * (modified and passed back to caller). 474 * Stat *asb - stat block for the file (modified and passed back 475 * to the caller). 476 * 477 * RETURNS 478 * 479 * Returns zero if successful; -1 otherwise. Assumes that the entire 480 * magic number has been read. 481 */ 482 483 #ifdef __STDC__ 484 485 static int inascii(char *magic, char *name, Stat *asb) 486 487 #else 488 489 static int inascii(magic, name, asb) 490 char *magic; 491 char *name; 492 Stat *asb; 493 494 #endif 495 { 496 uint namelen; 497 char header[H_STRLEN + 1]; 498 #ifdef _POSIX_SOURCE 499 dev_t dummyrdev; 500 #endif 501 502 if (strncmp(magic, M_ASCII, M_STRLEN) != 0) { 503 return (-1); 504 } 505 if (buf_read(header, H_STRLEN) < 0) { 506 warnarch("Corrupt ASCII header", (OFFSET) H_STRLEN); 507 return (-1); 508 } 509 header[H_STRLEN] = '\0'; 510 if (sscanf(header, H_SCAN, &asb->sb_dev, 511 &asb->sb_ino, &asb->sb_mode, &asb->sb_uid, 512 #ifdef _POSIX_SOURCE 513 &asb->sb_gid, &asb->sb_nlink, &dummyrdev, 514 #else 515 &asb->sb_gid, &asb->sb_nlink, &asb->sb_rdev, 516 #endif 517 &asb->sb_mtime, &namelen, &asb->sb_size) != H_COUNT) { 518 warnarch("Bad ASCII header", (OFFSET) H_STRLEN); 519 return (-1); 520 } 521 if (namelen == 0 || namelen >= PATH_MAX) { 522 warnarch("Bad ASCII pathname length", (OFFSET) H_STRLEN); 523 return (-1); 524 } 525 if (buf_read(name, namelen) < 0) { 526 warnarch("Corrupt ASCII pathname", (OFFSET) namelen); 527 return (-1); 528 } 529 if (name[namelen - 1] != '\0') { 530 warnarch("Bad ASCII pathname", (OFFSET) namelen); 531 return (-1); 532 } 533 return (0); 534 } 535 536 537 /* inbinary - read a binary header 538 * 539 * DESCRIPTION 540 * 541 * Reads a CPIO format binary header. 542 * 543 * PARAMETERS 544 * 545 * char *magic - magic number to match 546 * char *name - name of the file which is stored in the header. 547 * (modified and passed back to caller). 548 * Stat *asb - stat block for the file (modified and passed back 549 * to the caller). 550 * 551 * RETURNS 552 * 553 * Returns the number of trailing alignment bytes to skip; -1 if 554 * unsuccessful. 555 */ 556 557 #ifdef __STDC__ 558 559 static int inbinary(char *magic, char *name, Stat *asb) 560 561 #else 562 563 static int inbinary(magic, name, asb) 564 char *magic; 565 char *name; 566 Stat *asb; 567 568 #endif 569 { 570 uint namefull; 571 Binary binary; 572 573 if (*((ushort *) magic) != M_BINARY) { 574 return (-1); 575 } 576 memcpy((char *) &binary, 577 magic + sizeof(ushort), 578 M_STRLEN - sizeof(ushort)); 579 if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort), 580 sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) { 581 warnarch("Corrupt binary header", 582 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort))); 583 return (-1); 584 } 585 asb->sb_dev = binary.b_dev; 586 asb->sb_ino = binary.b_ino; 587 asb->sb_mode = binary.b_mode; 588 asb->sb_uid = binary.b_uid; 589 asb->sb_gid = binary.b_gid; 590 asb->sb_nlink = binary.b_nlink; 591 #ifndef _POSIX_SOURCE 592 asb->sb_rdev = binary.b_rdev; 593 #endif 594 asb->sb_mtime = binary.b_mtime[0] << 16 | binary.b_mtime[1]; 595 asb->sb_size = binary.b_size[0] << 16 | binary.b_size[1]; 596 if (binary.b_name == 0 || binary.b_name >= PATH_MAX) { 597 warnarch("Bad binary pathname length", 598 (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort))); 599 return (-1); 600 } 601 if (buf_read(name, namefull = binary.b_name + binary.b_name % 2) < 0) { 602 warnarch("Corrupt binary pathname", (OFFSET) namefull); 603 return (-1); 604 } 605 if (name[binary.b_name - 1] != '\0') { 606 warnarch("Bad binary pathname", (OFFSET) namefull); 607 return (-1); 608 } 609 return (asb->sb_size % 2); 610 } 611