1 /*- 2 * Copyright (c) 1992 Keith Muller. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Keith Muller of the University of California, San Diego. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 static char sccsid[] = "@(#)ar_subs.c 1.1 (Berkeley) 12/21/92"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/time.h> 18 #include <sys/stat.h> 19 #include <sys/param.h> 20 #include <signal.h> 21 #include <string.h> 22 #include <stdio.h> 23 #include <ctype.h> 24 #include <fcntl.h> 25 #include <errno.h> 26 #include <unistd.h> 27 #include <stdlib.h> 28 #include "pax.h" 29 #include "extern.h" 30 31 static void wr_archive __P((register ARCHD *)); 32 static int get_arc __P((void)); 33 static int next_head __P((register ARCHD *)); 34 extern sigset_t s_mask; 35 36 /* 37 * Routines which control the overall operation modes of pax as specified by 38 * the user: list, append, read ... 39 */ 40 41 static char hdbuf[BLKMULT]; /* space for archive header on read */ 42 u_long flcnt; /* number of files processed */ 43 44 /* 45 * list() 46 * list the contents of an archive which match user supplied pattern(s) 47 * (no pattern matches all). 48 */ 49 50 #if __STDC__ 51 void 52 list(void) 53 #else 54 void 55 list() 56 #endif 57 { 58 register ARCHD *arcn; 59 register int res; 60 ARCHD archd; 61 time_t now; 62 63 arcn = &archd; 64 /* 65 * figure out archive type; pass any format specific options to the 66 * archive option processing routine; call the format init routine. We 67 * also save current time for ls_list() so we do not make a system 68 * call for each file we need to print. If verbose (vflag) start up 69 * the name and group caches. 70 */ 71 if ((get_arc() < 0) || ((*frmt->options)() < 0) || 72 ((*frmt->st_rd)() < 0)) 73 return; 74 75 if (vflag && ((uidtb_start() < 0) || (gidtb_start() < 0))) 76 return; 77 78 now = time((time_t *)NULL); 79 80 /* 81 * step through the archive until the format says it is done 82 */ 83 while (next_head(arcn) == 0) { 84 /* 85 * check for pattern, and user specified options match. 86 * When all patterns are matched we are done. 87 */ 88 if ((res = pat_match(arcn)) < 0) 89 break; 90 91 if ((res == 0) && (sel_chk(arcn) == 0)) { 92 /* 93 * pattern resulted in a selected file 94 */ 95 if (pat_sel(arcn) < 0) 96 break; 97 98 /* 99 * modify the name as requested by the user if name 100 * survives modification, do a listing of the file 101 */ 102 if ((res = mod_name(arcn)) < 0) 103 break; 104 if (res == 0) 105 ls_list(arcn, now); 106 } 107 108 /* 109 * skip to next archive format header using values calculated 110 * by the format header read routine 111 */ 112 if (rd_skip(arcn->skip + arcn->pad) == 1) 113 break; 114 } 115 116 /* 117 * all done, let format have a chance to cleanup, and make sure that 118 * the patterns supplied by the user were all matched 119 */ 120 (void)(*frmt->end_rd)(); 121 (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); 122 ar_close(); 123 pat_chk(); 124 } 125 126 /* 127 * extract() 128 * extract the member(s) of an archive as specified by user supplied 129 * pattern(s) (no patterns extracts all members) 130 */ 131 132 #if __STDC__ 133 void 134 extract(void) 135 #else 136 void 137 extract() 138 #endif 139 { 140 register ARCHD *arcn; 141 register int res; 142 off_t cnt; 143 ARCHD archd; 144 struct stat sb; 145 int fd; 146 147 arcn = &archd; 148 /* 149 * figure out archive type; pass any format specific options to the 150 * archive option processing routine; call the format init routine; 151 * start up the directory modification time and access mode database 152 */ 153 if ((get_arc() < 0) || ((*frmt->options)() < 0) || 154 ((*frmt->st_rd)() < 0) || (dir_start() < 0)) 155 return; 156 157 /* 158 * When we are doing interactive rename, we store the mapping of names 159 * so we can fix up hard links files later in the archive. 160 */ 161 if (iflag && (name_start() < 0)) 162 return; 163 164 /* 165 * step through each entry on the archive until the format read routine 166 * says it is done 167 */ 168 while (next_head(arcn) == 0) { 169 170 /* 171 * check for pattern, and user specified options match. When 172 * all the patterns are matched we are done 173 */ 174 if ((res = pat_match(arcn)) < 0) 175 break; 176 177 if ((res > 0) || (sel_chk(arcn) != 0)) { 178 /* 179 * file is not selected. skip past any file data and 180 * padding and go back for the next archive member 181 */ 182 (void)rd_skip(arcn->skip + arcn->pad); 183 continue; 184 } 185 186 /* 187 * with -u, only extract if the archive member is newer than 188 * the file with the same name in the file system (no test of 189 * being the same type is required). 190 * NOTE: this test is done BEFORE name modifications as 191 * specified by pax. this operation can be confusing to the 192 * user who might expect the test to be done on an existing 193 * file AFTER the name mod. In honesty the pax spec is probably 194 * flawed in this respect. 195 */ 196 if (uflag && (lstat(arcn->name, &sb) == 0) && 197 (arcn->sb.st_mtime <= sb.st_mtime)) { 198 (void)rd_skip(arcn->skip + arcn->pad); 199 continue; 200 } 201 202 /* 203 * this archive member is now been selected. modify the name. 204 */ 205 if (pat_sel(arcn) < 0) 206 break; 207 if ((res = mod_name(arcn)) < 0) 208 break; 209 if (res > 0) { 210 /* 211 * a bad name mod, skip and purge name from link table 212 */ 213 purg_lnk(arcn); 214 (void)rd_skip(arcn->skip + arcn->pad); 215 continue; 216 } 217 218 /* 219 * If the user asked for -Z they want a time check done after 220 * the name mod. 221 */ 222 if (Zflag && (lstat(arcn->name, &sb) == 0) && 223 (arcn->sb.st_mtime <= sb.st_mtime)) { 224 (void)rd_skip(arcn->skip + arcn->pad); 225 continue; 226 } 227 228 if (vflag) { 229 (void)fputs(arcn->name, stderr); 230 vfpart = 1; 231 } 232 233 /* 234 * all ok, extract this member based on type 235 */ 236 if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { 237 /* 238 * process archive members that are not regular files. 239 * throw out padding and any data that might follow the 240 * header (as determined by the format). 241 */ 242 if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) 243 res = lnk_creat(arcn); 244 else 245 res = node_creat(arcn); 246 247 (void)rd_skip(arcn->skip + arcn->pad); 248 if (res < 0) 249 purg_lnk(arcn); 250 251 if (vflag && vfpart) { 252 (void)putc('\n', stderr); 253 vfpart = 0; 254 } 255 continue; 256 } 257 /* 258 * we have a file with data here. If we can not create it, skip 259 * over the data and purge the name from hard link table 260 */ 261 if ((fd = file_creat(arcn)) < 0) { 262 (void)rd_skip(arcn->skip + arcn->pad); 263 purg_lnk(arcn); 264 continue; 265 } 266 /* 267 * extract the file from the archive and skip over padding and 268 * any unprocessed data 269 */ 270 res = (*frmt->rd_data)(arcn, fd, &cnt); 271 file_close(arcn, fd); 272 if (vflag && vfpart) { 273 (void)putc('\n', stderr); 274 vfpart = 0; 275 } 276 if (!res) 277 (void)rd_skip(cnt + arcn->pad); 278 } 279 280 /* 281 * all done, restore directory modes and times as required; make sure 282 * all patterns supplied by the user were matched; block off signals 283 * to avoid chance for multiple entry into the cleanup code. 284 */ 285 (void)(*frmt->end_rd)(); 286 (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); 287 ar_close(); 288 proc_dir(); 289 pat_chk(); 290 } 291 292 /* 293 * wr_archive() 294 * Write an archive. used in both creating a new archive and appends on 295 * previously written archive. 296 */ 297 298 #if __STDC__ 299 static void 300 wr_archive(register ARCHD *arcn) 301 #else 302 static void 303 wr_archive(arcn) 304 register ARCHD *arcn; 305 #endif 306 { 307 register int res; 308 register int hlk; 309 off_t cnt; 310 int (*wrf)(); 311 int fd = -1; 312 313 /* 314 * if this format supports hard link storage, start up the database 315 * that detects them. 316 */ 317 if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) 318 return; 319 320 /* 321 * start up the file traversal code and format specific write 322 */ 323 if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0)) 324 return; 325 wrf = frmt->wr; 326 327 /* 328 * When we are doing interactive rename, we store the mapping of names 329 * so we can fix up hard links files later in the archive. 330 */ 331 if (iflag && (name_start() < 0)) 332 return; 333 334 /* 335 * while there are files to archive, process them one at at time 336 */ 337 while (next_file(arcn) == 0) { 338 /* 339 * check if this file meets user specified options match. 340 */ 341 if (sel_chk(arcn) != 0) 342 continue; 343 fd = -1; 344 if (uflag) { 345 /* 346 * only archive if this file is newer than a file with 347 * the same name that is already stored on the archive 348 */ 349 if ((res = chk_ftime(arcn)) < 0) 350 break; 351 if (res > 0) 352 continue; 353 } 354 355 /* 356 * this file is considered selected now. see if this is a hard 357 * link to a file already stored 358 */ 359 ftree_sel(arcn); 360 if (hlk && (chk_lnk(arcn) < 0)) 361 break; 362 363 if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) || 364 (arcn->type == PAX_CTG)) { 365 /* 366 * we will have to read this file. by opening it now we 367 * can avoid writing a header to the archive for a file 368 * we were later unable to read (we also purge it from 369 * the link table). 370 */ 371 if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { 372 syswarn(1,errno, "Unable to open %s to read", 373 arcn->org_name); 374 purg_lnk(arcn); 375 continue; 376 } 377 } 378 379 /* 380 * Now modify the name as requested by the user 381 */ 382 if ((res = mod_name(arcn)) < 0) { 383 /* 384 * name modification says to skip this file, close the 385 * file and purge link table entry 386 */ 387 rdfile_close(arcn, &fd); 388 purg_lnk(arcn); 389 break; 390 } 391 392 if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { 393 /* 394 * unable to obtain the crc we need, close the file, 395 * purge link table entry 396 */ 397 rdfile_close(arcn, &fd); 398 purg_lnk(arcn); 399 continue; 400 } 401 402 if (vflag) { 403 (void)fputs(arcn->name, stderr); 404 vfpart = 1; 405 } 406 ++flcnt; 407 408 /* 409 * looks safe to store the file, have the format specific 410 * routine write routine store the file header on the archive 411 */ 412 if ((res = (*wrf)(arcn)) < 0) { 413 rdfile_close(arcn, &fd); 414 break; 415 } 416 if (res > 0) { 417 /* 418 * format write says no file data needs to be stored 419 * so we are done messing with this file 420 */ 421 if (vflag && vfpart) { 422 (void)putc('\n', stderr); 423 vfpart = 0; 424 } 425 rdfile_close(arcn, &fd); 426 continue; 427 } 428 429 /* 430 * Add file data to the archive, quit on write error. if we 431 * cannot write the entire file contents to the archive we 432 * must pad the archive to replace the missing file data 433 * (otherwise during an extract the file header for the file 434 * which FOLLOWS this one will not be where we expect it to 435 * be). 436 */ 437 res = (*frmt->wr_data)(arcn, fd, &cnt); 438 rdfile_close(arcn, &fd); 439 if (vflag && vfpart) { 440 (void)putc('\n', stderr); 441 vfpart = 0; 442 } 443 if (res < 0) 444 break; 445 446 /* 447 * pad as required, cnt is number of bytes not written 448 */ 449 if (((cnt > 0) && (wr_skip(cnt) < 0)) || 450 ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) 451 break; 452 } 453 454 /* 455 * tell format to write trailer; pad to block boundry; reset directory 456 * mode/access times, and check if all patterns supplied by the user 457 * were matched. block off signals to avoid chance for multiple entry 458 * into the cleanup code 459 */ 460 (*frmt->end_wr)(); 461 wr_fin(); 462 (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); 463 ar_close(); 464 if (tflag) 465 proc_dir(); 466 ftree_chk(); 467 } 468 469 /* 470 * append() 471 * Add file to previously written archive. Archive format specified by the 472 * user must agree with archive. The archive is read first to collect 473 * modification times (if -u) and locate the archive trailer. The archive 474 * is positioned in front of the record with the trailer and wr_archive() 475 * is called to add the new members. 476 * PAX IMPLEMENTATION DETAIL NOTE: 477 * -u is implemented by adding the new members to the end of the archive. 478 * Care is taken so that these do not end up as links to the older 479 * version of the same file already stored in the archive. It is expected 480 * when extraction occurs these newer versions will over-write the older 481 * ones stored "earlier" in the archive (this may be a bad assumption as 482 * it depends on the implementation of the program doing the extraction). 483 * It is really difficult to splice in members without either re-writing 484 * the entire archive (from the point were the old version was), or having 485 * assistance of the format specification in terms of a special update 486 * header that invalidates a previous archive record. The posix spec left 487 * the method used to implement -u unspecified. This pax is able to 488 * over write existing files that it creates. 489 */ 490 491 #if __STDC__ 492 void 493 append(void) 494 #else 495 void 496 append() 497 #endif 498 { 499 register ARCHD *arcn; 500 register int res; 501 ARCHD archd; 502 FSUB *orgfrmt; 503 int udev; 504 off_t tlen; 505 506 arcn = &archd; 507 orgfrmt = frmt; 508 509 /* 510 * Do not allow an append operation if the actual archive is of a 511 * different format than the user specified foramt. 512 */ 513 if (get_arc() < 0) 514 return; 515 if ((orgfrmt != NULL) && (orgfrmt != frmt)) { 516 warn(1, "Cannot mix current archive format %s with %s", 517 frmt->name, orgfrmt->name); 518 return; 519 } 520 521 /* 522 * pass the format any options and start up format 523 */ 524 if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) 525 return; 526 527 /* 528 * if we only are adding members that are newer, we need to save the 529 * mod times for all files we see. 530 */ 531 if (uflag && (ftime_start() < 0)) 532 return; 533 534 /* 535 * some archive formats encode hard links by recording the device and 536 * file serial number (inode) but copy the file anyway (multiple times) 537 * to the archive. When we append, we run the risk that newly added 538 * files may have the same device and inode numbers as those recorded 539 * on the archive but during a previous run. If this happens, when the 540 * archive is extracted we get INCORRECT hard links. We avoid this by 541 * remapping the device numbers so that newly added files will never 542 * use the same device number as one found on the archive. remapping 543 * allows new members to safely have links among themselves. remapping 544 * also avoids problems with file inode (serial number) truncations 545 * when the inode number is larger than storage space in the archive 546 * header. See the remap routines for more details. 547 */ 548 if ((udev = frmt->udev) && (dev_start() < 0)) 549 return; 550 551 /* 552 * step through the archive until the format says it is done 553 */ 554 while (next_head(arcn) == 0) { 555 /* 556 * check if this file meets user specified options. 557 */ 558 if (sel_chk(arcn) != 0) { 559 if (rd_skip(arcn->skip + arcn->pad) == 1) 560 break; 561 continue; 562 } 563 if (uflag) { 564 /* 565 * see if this is the newest version of this file has 566 * already been seen, if so skip. 567 */ 568 if ((res = chk_ftime(arcn)) < 0) 569 break; 570 if (res > 0) { 571 if (rd_skip(arcn->skip + arcn->pad) == 1) 572 break; 573 continue; 574 } 575 } 576 577 /* 578 * Store this device number. Device numbers seen during the 579 * read phase of append will cause newly appended files with a 580 * device number seen in the old part of the archive to be 581 * remapped to an unused device number. 582 */ 583 if ((udev && (add_dev(arcn) < 0)) || 584 (rd_skip(arcn->skip + arcn->pad) == 1)) 585 break; 586 587 ++flcnt; 588 } 589 590 /* 591 * done, finish up read and get the number of bytes to back up so we 592 * can add new members. The format might have used the hard link table, 593 * purge it. 594 */ 595 tlen = (*frmt->end_rd)(); 596 lnk_end(); 597 598 /* 599 * try to postion for write, if this fails quit. if any error occurs, 600 * we will refuse to write 601 */ 602 if ((appnd_start(tlen) < 0) || (exit_val != 0)) 603 return; 604 605 /* 606 * go to the writing phase to add the new members 607 */ 608 wr_archive(arcn); 609 } 610 611 /* 612 * archive() 613 * write a new archive 614 */ 615 616 #if __STDC__ 617 void 618 archive(void) 619 #else 620 void 621 archive() 622 #endif 623 { 624 ARCHD archd; 625 626 /* 627 * if we only are adding members that are newer, we need to save the 628 * mod times for all files; set up for writing; pass the format any 629 * options write the archive 630 */ 631 if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) 632 return; 633 if ((*frmt->options)() < 0) 634 return; 635 636 wr_archive(&archd); 637 } 638 639 /* 640 * copy() 641 * copy files from one part of the file system to another. this does not 642 * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an 643 * archive was written and then extracted in the destination directory 644 * (except the files are forced to be under the destination directory). 645 */ 646 647 #if __STDC__ 648 void 649 copy(void) 650 #else 651 void 652 copy() 653 #endif 654 { 655 register ARCHD *arcn; 656 register int res; 657 register int fddest; 658 register char *dest_pt; 659 register int dlen; 660 register int drem; 661 int fdsrc = -1; 662 struct stat sb; 663 ARCHD archd; 664 char dirbuf[PAXPATHLEN+1]; 665 666 arcn = &archd; 667 /* 668 * set up the destination dir path and make sure it is a directory. We 669 * make sure we have a trailing / on the destination 670 */ 671 dlen = l_strncpy(dirbuf, dirptr, PAXPATHLEN); 672 dest_pt = dirbuf + dlen; 673 if (*(dest_pt-1) != '/') { 674 *dest_pt++ = '/'; 675 ++dlen; 676 } 677 *dest_pt = '\0'; 678 drem = PAXPATHLEN - dlen; 679 680 if (stat(dirptr, &sb) < 0) { 681 syswarn(1, errno, "Cannot access destination directory %s", 682 dirbuf); 683 return; 684 } 685 if (!S_ISDIR(sb.st_mode)) { 686 warn(1, "Destination is not a directory %s", dirbuf); 687 return; 688 } 689 690 /* 691 * start up the hard link table; file traversal routines and the 692 * modification time and access mode database 693 */ 694 if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) 695 return; 696 697 /* 698 * When we are doing interactive rename, we store the mapping of names 699 * so we can fix up hard links files later in the archive. 700 */ 701 if (iflag && (name_start() < 0)) 702 return; 703 704 /* 705 * set up to cp file trees 706 */ 707 cp_start(); 708 709 /* 710 * while there are files to archive, process them 711 */ 712 while (next_file(arcn) == 0) { 713 fdsrc = -1; 714 715 /* 716 * check if this file meets user specified options 717 */ 718 if (sel_chk(arcn) != 0) 719 continue; 720 721 /* 722 * if there is already a file in the destination directory with 723 * the same name and it is newer, skip the one stored on the 724 * archive. 725 * NOTE: this test is done BEFORE name modifications as 726 * specified by pax. this can be confusing to the user who 727 * might expect the test to be done on an existing file AFTER 728 * the name mod. In honesty the pax spec is probably flawed in 729 * this respect 730 */ 731 if (uflag) { 732 /* 733 * create the destination name 734 */ 735 if (*(arcn->name) == '/') 736 res = 1; 737 else 738 res = 0; 739 if ((arcn->nlen - res) > drem) { 740 warn(1, "Destination pathname too long %s", 741 arcn->name); 742 continue; 743 } 744 (void)strncpy(dest_pt, arcn->name + res, drem); 745 dirbuf[PAXPATHLEN] = '\0'; 746 747 /* 748 * if existing file is same age or newer skip 749 */ 750 res = lstat(dirbuf, &sb); 751 *dest_pt = '\0'; 752 753 if ((res == 0) && (arcn->sb.st_mtime <= sb.st_mtime)) 754 continue; 755 } 756 757 /* 758 * this file is considered selected. See if this is a hard link 759 * to a previous file; modify the name as requested by the 760 * user; set the final destination. 761 */ 762 ftree_sel(arcn); 763 if (chk_lnk(arcn) < 0) 764 break; 765 if ((res = mod_name(arcn)) < 0) 766 break; 767 if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { 768 /* 769 * skip file, purge from link table 770 */ 771 purg_lnk(arcn); 772 continue; 773 } 774 775 /* 776 * Non standard -Z flag. When the exisiting file is 777 * same age or newer skip 778 */ 779 if (Zflag && (lstat(arcn->name, &sb) == 0) && 780 (arcn->sb.st_mtime <= sb.st_mtime)) 781 continue; 782 783 if (vflag) { 784 (void)fputs(arcn->name, stderr); 785 vfpart = 1; 786 } 787 ++flcnt; 788 789 /* 790 * try to create a hard link to the src file if requested 791 * but make sure we are not trying to overwrite ourselves. 792 */ 793 if (lflag) 794 res = cross_lnk(arcn); 795 else 796 res = chk_same(arcn); 797 if (res <= 0) { 798 if (vflag && vfpart) { 799 (void)putc('\n', stderr); 800 vfpart = 0; 801 } 802 continue; 803 } 804 805 /* 806 * have to create a new file 807 */ 808 if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { 809 /* 810 * create a link or special file 811 */ 812 if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) 813 res = lnk_creat(arcn); 814 else 815 res = node_creat(arcn); 816 if (res < 0) 817 purg_lnk(arcn); 818 if (vflag && vfpart) { 819 (void)putc('\n', stderr); 820 vfpart = 0; 821 } 822 continue; 823 } 824 825 /* 826 * have to copy a regular file to the destination directory. 827 * first open source file and then create the destination file 828 */ 829 if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { 830 syswarn(1, errno, "Unable to open %s to read", 831 arcn->org_name); 832 purg_lnk(arcn); 833 continue; 834 } 835 if ((fddest = file_creat(arcn)) < 0) { 836 rdfile_close(arcn, &fdsrc); 837 purg_lnk(arcn); 838 continue; 839 } 840 841 /* 842 * copy source file data to the destination file 843 */ 844 cp_file(arcn, fdsrc, fddest); 845 file_close(arcn, fddest); 846 rdfile_close(arcn, &fdsrc); 847 848 if (vflag && vfpart) { 849 (void)putc('\n', stderr); 850 vfpart = 0; 851 } 852 } 853 854 /* 855 * restore directory modes and times as required; make sure all 856 * patterns were selected block off signals to avoid chance for 857 * multiple entry into the cleanup code. 858 */ 859 (void)sigprocmask(SIG_BLOCK, &s_mask, (sigset_t *)NULL); 860 ar_close(); 861 proc_dir(); 862 ftree_chk(); 863 } 864 865 /* 866 * next_head() 867 * try to find a valid header in the archive. Uses format specific 868 * routines to extract the header and id the trailer. Trailers may be 869 * located within a valid header or in an invalid header (the location 870 * is format specific. The inhead field from the option table tells us 871 * where to look for the trailer). 872 * We keep reading (and resyncing) until we get enough contiguous data 873 * to check for a header. If we cannot find one, we shift by a byte 874 * add a new byte from the archive to the end of the buffer and try again. 875 * If we get a read error, we throw out what we have (as we must have 876 * contiguous data) and start over again. 877 * ASSUMED: headers fit within a BLKMULT header. 878 * Return: 879 * 0 if we got a header, -1 if we are unable to ever find another one 880 * (we reached the end of input, or we reached the limit on retries. see 881 * the specs for rd_wrbuf() for more details) 882 */ 883 884 #if __STDC__ 885 static int 886 next_head(register ARCHD *arcn) 887 #else 888 static int 889 next_head(arcn) 890 register ARCHD *arcn; 891 #endif 892 { 893 register int ret; 894 register char *hdend; 895 register int res; 896 register int shftsz; 897 register int hsz; 898 register int in_resync = 0; /* set when we are in resync mode */ 899 int cnt = 0; /* counter for trailer function */ 900 901 /* 902 * set up initial conditions, we want a whole frmt->hsz block as we 903 * have no data yet. 904 */ 905 res = hsz = frmt->hsz; 906 hdend = hdbuf; 907 shftsz = hsz - 1; 908 for(;;) { 909 /* 910 * keep looping until we get a contiguous FULL buffer 911 * (frmt->hsz is the proper size) 912 */ 913 for (;;) { 914 if ((ret = rd_wrbuf(hdend, res)) == res) 915 break; 916 917 /* 918 * some kind of archive read problem, try to resync the 919 * storage device, better give the user the bad news. 920 */ 921 if ((ret == 0) || (rd_sync() < 0)) { 922 warn(1,"Premature end of file on archive read"); 923 return(-1); 924 } 925 if (!in_resync) { 926 if (act == APPND) { 927 warn(1, 928 "Archive I/O error, cannot continue"); 929 return(-1); 930 } 931 warn(1,"Archive I/O error. Trying to recover."); 932 ++in_resync; 933 } 934 935 /* 936 * oh well, throw it all out and start over 937 */ 938 res = hsz; 939 hdend = hdbuf; 940 } 941 942 /* 943 * ok we have a contiguous buffer of the right size. Call the 944 * format read routine. If this was not a valid header and this 945 * format stores trailers outside of the header, call the 946 * format specific trailer routine to check for a trailer. We 947 * have to watch out that we do not mis-identify file data or 948 * block padding as a header or trailer. Format specific 949 * trailer functions must NOT check for the trailer while we 950 * are running in resync mode. Some trailer functions may tell 951 * us that this block cannot contain a valid header either, so 952 * we then throw out the entire block and start over. 953 */ 954 if ((*frmt->rd)(arcn, hdbuf) == 0) 955 break; 956 957 if (!frmt->inhead) { 958 /* 959 * this format has trailers outside of valid headers 960 */ 961 if ((ret = (*frmt->trail)(hdbuf,in_resync, &cnt)) == 0) 962 return(-1); 963 if (ret == 1) { 964 /* 965 * we are in resync and we were told to throw 966 * the whole block out because none of the 967 * bytes in this block can be used to form a 968 * valid header 969 */ 970 res = hsz; 971 hdend = hdbuf; 972 continue; 973 } 974 } 975 976 /* 977 * Brute force section. 978 * not a valid header. We may be able to find a header yet. So 979 * we shift over by one byte, and set up to read one byte at a 980 * time from the archive and place it at the end of the buffer. 981 * We will keep moving byte at a time until we find a header or 982 * get a read error and have to start over. 983 */ 984 if (!in_resync) { 985 if (act == APPND) { 986 warn(1,"Unable to append, archive header flaw"); 987 return(-1); 988 } 989 warn(1,"Invalid header, starting valid header search."); 990 ++in_resync; 991 } 992 bcopy(hdbuf+1, hdbuf, shftsz); 993 res = 1; 994 hdend = hdbuf + shftsz; 995 } 996 997 /* 998 * ok got a valid header, check for trailer if format encodes it in the 999 * the header. NOTE: the parameters are different than trailer routines 1000 * which encode trailers outside of the header! 1001 */ 1002 if (frmt->inhead && ((*frmt->trail)(arcn) == 0)) 1003 return(-1); 1004 ++flcnt; 1005 return(0); 1006 } 1007 1008 /* 1009 * get_arc() 1010 * Figure out what format an archive is. Handles archive with flaws by 1011 * brute force searches for a legal header in any supported format. The 1012 * format id routines have to be careful to NOT mis-identify a format. 1013 * ASSUMED: headers fit within a BLKMULT header. 1014 * Return: 1015 * 0 if archive found -1 otherwise 1016 */ 1017 1018 #if __STDC__ 1019 static int 1020 get_arc(void) 1021 #else 1022 static int 1023 get_arc() 1024 #endif 1025 { 1026 register int i; 1027 register int hdsz = 0; 1028 register int res; 1029 register int minhd = BLKMULT; 1030 char *hdend; 1031 int notice = 0; 1032 1033 /* 1034 * find the smallest header size in all archive formats and then set up 1035 * to read the archive. 1036 */ 1037 for (i = 0; ford[i] >= 0; ++i) { 1038 if (fsub[ford[i]].hsz < minhd) 1039 minhd = fsub[ford[i]].hsz; 1040 } 1041 if (rd_start() < 0) 1042 return(-1); 1043 res = BLKMULT; 1044 hdsz = 0; 1045 hdend = hdbuf; 1046 for(;;) { 1047 for (;;) { 1048 /* 1049 * fill the buffer with at least the smallest header 1050 */ 1051 i = rd_wrbuf(hdend, res); 1052 if (i > 0) 1053 hdsz += i; 1054 if (hdsz >= minhd) 1055 break; 1056 1057 /* 1058 * if we cannot recover from a read error quit 1059 */ 1060 if ((i == 0) || (rd_sync() < 0)) 1061 goto out; 1062 1063 /* 1064 * when we get an error none of the data we already 1065 * have can be used to create a legal header (we just 1066 * got an error in the middle), so we throw it all out 1067 * and refill the buffer with fresh data. 1068 */ 1069 res = BLKMULT; 1070 hdsz = 0; 1071 hdend = hdbuf; 1072 if (!notice) { 1073 if (act == APPND) 1074 return(-1); 1075 warn(1,"Cannot identify format. Searching..."); 1076 ++notice; 1077 } 1078 } 1079 1080 /* 1081 * we have at least the size of the smallest header in any 1082 * archive format. Look to see if we have a match. The array 1083 * ford[] is used to specify the header id order to reduce the 1084 * chance of incorrectly id'ing a valid header (some formats 1085 * may be subsets of each other and the order would then be 1086 * important). 1087 */ 1088 for (i = 0; ford[i] >= 0; ++i) { 1089 if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0) 1090 continue; 1091 frmt = &(fsub[ford[i]]); 1092 /* 1093 * yuck, to avoid slow special case code in the extract 1094 * routines, just push this header back as if it was 1095 * not seen. We have left extra space at start of the 1096 * buffer for this purpose. This is a bit ugly, but 1097 * adding all the special case code is far worse. 1098 */ 1099 pback(hdbuf, hdsz); 1100 return(0); 1101 } 1102 1103 /* 1104 * We have a flawed archive, no match. we start searching, but 1105 * we never allow additions to flawed archives 1106 */ 1107 if (!notice) { 1108 if (act == APPND) 1109 return(-1); 1110 warn(1, "Cannot identify format. Searching..."); 1111 ++notice; 1112 } 1113 1114 /* 1115 * brute force search for a header that we can id. 1116 * we shift through byte at a time. this is slow, but we cannot 1117 * determine the nature of the flaw in the archive in a 1118 * portable manner 1119 */ 1120 if (--hdsz > 0) { 1121 bcopy(hdbuf+1, hdbuf, hdsz); 1122 res = BLKMULT - hdsz; 1123 hdend = hdbuf + hdsz; 1124 } else { 1125 res = BLKMULT; 1126 hdend = hdbuf; 1127 hdsz = 0; 1128 } 1129 } 1130 1131 out: 1132 /* 1133 * we cannot find a header, bow, apologize and quit 1134 */ 1135 warn(1, "Sorry, unable to determine archive format."); 1136 return(-1); 1137 } 1138