1 /* $OpenBSD: tar.c,v 1.53 2014/02/19 03:59:47 guenther Exp $ */ 2 /* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1992 Keith Muller. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Keith Muller of the University of California, San Diego. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/time.h> 39 #include <sys/stat.h> 40 #include <ctype.h> 41 #include <errno.h> 42 #include <limits.h> 43 #include <string.h> 44 #include <stdio.h> 45 #include <unistd.h> 46 #include <stdlib.h> 47 #include "pax.h" 48 #include "extern.h" 49 #include "tar.h" 50 51 /* 52 * Routines for reading, writing and header identify of various versions of tar 53 */ 54 55 static size_t expandname(char *, size_t, char **, const char *, size_t); 56 static u_long tar_chksm(char *, int); 57 static char *name_split(char *, int); 58 static int ul_oct(u_long, char *, int, int); 59 static int uqd_oct(u_quad_t, char *, int, int); 60 #ifndef SMALL 61 static int rd_xheader(ARCHD *, char *, off_t, char); 62 #endif 63 64 static uid_t uid_nobody; 65 static uid_t uid_warn; 66 static gid_t gid_nobody; 67 static gid_t gid_warn; 68 69 /* 70 * Routines common to all versions of tar 71 */ 72 73 static int tar_nodir; /* do not write dirs under old tar */ 74 char *gnu_name_string; /* GNU ././@LongLink hackery name */ 75 char *gnu_link_string; /* GNU ././@LongLink hackery link */ 76 77 /* 78 * tar_endwr() 79 * add the tar trailer of two null blocks 80 * Return: 81 * 0 if ok, -1 otherwise (what wr_skip returns) 82 */ 83 84 int 85 tar_endwr(void) 86 { 87 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 88 } 89 90 /* 91 * tar_endrd() 92 * no cleanup needed here, just return size of trailer (for append) 93 * Return: 94 * size of trailer (2 * BLKMULT) 95 */ 96 97 off_t 98 tar_endrd(void) 99 { 100 return((off_t)(NULLCNT*BLKMULT)); 101 } 102 103 /* 104 * tar_trail() 105 * Called to determine if a header block is a valid trailer. We are passed 106 * the block, the in_sync flag (which tells us we are in resync mode; 107 * looking for a valid header), and cnt (which starts at zero) which is 108 * used to count the number of empty blocks we have seen so far. 109 * Return: 110 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 111 * could never contain a header. 112 */ 113 114 int 115 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt) 116 { 117 int i; 118 119 /* 120 * look for all zero, trailer is two consecutive blocks of zero 121 */ 122 for (i = 0; i < BLKMULT; ++i) { 123 if (buf[i] != '\0') 124 break; 125 } 126 127 /* 128 * if not all zero it is not a trailer, but MIGHT be a header. 129 */ 130 if (i != BLKMULT) 131 return(-1); 132 133 /* 134 * When given a zero block, we must be careful! 135 * If we are not in resync mode, check for the trailer. Have to watch 136 * out that we do not mis-identify file data as the trailer, so we do 137 * NOT try to id a trailer during resync mode. During resync mode we 138 * might as well throw this block out since a valid header can NEVER be 139 * a block of all 0 (we must have a valid file name). 140 */ 141 if (!in_resync && (++*cnt >= NULLCNT)) 142 return(0); 143 return(1); 144 } 145 146 /* 147 * ul_oct() 148 * convert an unsigned long to an octal string. many oddball field 149 * termination characters are used by the various versions of tar in the 150 * different fields. term selects which kind to use. str is '0' padded 151 * at the front to len. we are unable to use only one format as many old 152 * tar readers are very cranky about this. 153 * Return: 154 * 0 if the number fit into the string, -1 otherwise 155 */ 156 157 static int 158 ul_oct(u_long val, char *str, int len, int term) 159 { 160 char *pt; 161 162 /* 163 * term selects the appropriate character(s) for the end of the string 164 */ 165 pt = str + len - 1; 166 switch (term) { 167 case 3: 168 *pt-- = '\0'; 169 break; 170 case 2: 171 *pt-- = ' '; 172 *pt-- = '\0'; 173 break; 174 case 1: 175 *pt-- = ' '; 176 break; 177 case 0: 178 default: 179 *pt-- = '\0'; 180 *pt-- = ' '; 181 break; 182 } 183 184 /* 185 * convert and blank pad if there is space 186 */ 187 while (pt >= str) { 188 *pt-- = '0' + (char)(val & 0x7); 189 if ((val = val >> 3) == (u_long)0) 190 break; 191 } 192 193 while (pt >= str) 194 *pt-- = '0'; 195 if (val != (u_long)0) 196 return(-1); 197 return(0); 198 } 199 200 /* 201 * uqd_oct() 202 * convert an u_quad_t to an octal string. one of many oddball field 203 * termination characters are used by the various versions of tar in the 204 * different fields. term selects which kind to use. str is '0' padded 205 * at the front to len. we are unable to use only one format as many old 206 * tar readers are very cranky about this. 207 * Return: 208 * 0 if the number fit into the string, -1 otherwise 209 */ 210 211 static int 212 uqd_oct(u_quad_t val, char *str, int len, int term) 213 { 214 char *pt; 215 216 /* 217 * term selects the appropriate character(s) for the end of the string 218 */ 219 pt = str + len - 1; 220 switch (term) { 221 case 3: 222 *pt-- = '\0'; 223 break; 224 case 2: 225 *pt-- = ' '; 226 *pt-- = '\0'; 227 break; 228 case 1: 229 *pt-- = ' '; 230 break; 231 case 0: 232 default: 233 *pt-- = '\0'; 234 *pt-- = ' '; 235 break; 236 } 237 238 /* 239 * convert and blank pad if there is space 240 */ 241 while (pt >= str) { 242 *pt-- = '0' + (char)(val & 0x7); 243 if ((val = val >> 3) == 0) 244 break; 245 } 246 247 while (pt >= str) 248 *pt-- = '0'; 249 if (val != (u_quad_t)0) 250 return(-1); 251 return(0); 252 } 253 254 /* 255 * tar_chksm() 256 * calculate the checksum for a tar block counting the checksum field as 257 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 258 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 259 * pad headers with 0. 260 * Return: 261 * unsigned long checksum 262 */ 263 264 static u_long 265 tar_chksm(char *blk, int len) 266 { 267 char *stop; 268 char *pt; 269 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 270 271 /* 272 * add the part of the block before the checksum field 273 */ 274 pt = blk; 275 stop = blk + CHK_OFFSET; 276 while (pt < stop) 277 chksm += (u_long)(*pt++ & 0xff); 278 /* 279 * move past the checksum field and keep going, spec counts the 280 * checksum field as the sum of 8 blanks (which is pre-computed as 281 * BLNKSUM). 282 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 283 * starts, no point in summing zero's) 284 */ 285 pt += CHK_LEN; 286 stop = blk + len; 287 while (pt < stop) 288 chksm += (u_long)(*pt++ & 0xff); 289 return(chksm); 290 } 291 292 /* 293 * Routines for old BSD style tar (also made portable to sysV tar) 294 */ 295 296 /* 297 * tar_id() 298 * determine if a block given to us is a valid tar header (and not a USTAR 299 * header). We have to be on the lookout for those pesky blocks of all 300 * zero's. 301 * Return: 302 * 0 if a tar header, -1 otherwise 303 */ 304 305 int 306 tar_id(char *blk, int size) 307 { 308 HD_TAR *hd; 309 HD_USTAR *uhd; 310 311 if (size < BLKMULT) 312 return(-1); 313 hd = (HD_TAR *)blk; 314 uhd = (HD_USTAR *)blk; 315 316 /* 317 * check for block of zero's first, a simple and fast test, then make 318 * sure this is not a ustar header by looking for the ustar magic 319 * cookie. We should use TMAGLEN, but some USTAR archive programs are 320 * wrong and create archives missing the \0. Last we check the 321 * checksum. If this is ok we have to assume it is a valid header. 322 */ 323 if (hd->name[0] == '\0') 324 return(-1); 325 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 326 return(-1); 327 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) 328 return(-1); 329 force_one_volume = 1; 330 return(0); 331 } 332 333 /* 334 * tar_opt() 335 * handle tar format specific -o options 336 * Return: 337 * 0 if ok -1 otherwise 338 */ 339 340 int 341 tar_opt(void) 342 { 343 OPLIST *opt; 344 345 while ((opt = opt_next()) != NULL) { 346 if (strcmp(opt->name, TAR_OPTION) || 347 strcmp(opt->value, TAR_NODIR)) { 348 paxwarn(1, "Unknown tar format -o option/value pair %s=%s", 349 opt->name, opt->value); 350 paxwarn(1,"%s=%s is the only supported tar format option", 351 TAR_OPTION, TAR_NODIR); 352 return(-1); 353 } 354 355 /* 356 * we only support one option, and only when writing 357 */ 358 if ((act != APPND) && (act != ARCHIVE)) { 359 paxwarn(1, "%s=%s is only supported when writing.", 360 opt->name, opt->value); 361 return(-1); 362 } 363 tar_nodir = 1; 364 } 365 return(0); 366 } 367 368 369 /* 370 * tar_rd() 371 * extract the values out of block already determined to be a tar header. 372 * store the values in the ARCHD parameter. 373 * Return: 374 * 0 375 */ 376 377 int 378 tar_rd(ARCHD *arcn, char *buf) 379 { 380 HD_TAR *hd; 381 u_quad_t val; 382 char *pt; 383 384 /* 385 * we only get proper sized buffers passed to us 386 */ 387 if (tar_id(buf, BLKMULT) < 0) 388 return(-1); 389 memset(arcn, 0, sizeof(*arcn)); 390 arcn->org_name = arcn->name; 391 arcn->sb.st_nlink = 1; 392 393 /* 394 * copy out the name and values in the stat buffer 395 */ 396 hd = (HD_TAR *)buf; 397 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 398 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 399 &gnu_name_string, hd->name, sizeof(hd->name)); 400 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 401 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 402 } 403 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 404 0xfff); 405 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 406 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 407 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT); 408 val = asc_uqd(hd->mtime, sizeof(hd->mtime), OCT); 409 if ((time_t)val < 0 || (time_t)val != val) 410 arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ 411 else 412 arcn->sb.st_mtime = val; 413 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 414 415 /* 416 * have to look at the last character, it may be a '/' and that is used 417 * to encode this as a directory 418 */ 419 pt = &(arcn->name[arcn->nlen - 1]); 420 arcn->pad = 0; 421 arcn->skip = 0; 422 switch (hd->linkflag) { 423 case SYMTYPE: 424 /* 425 * symbolic link, need to get the link name and set the type in 426 * the st_mode so -v printing will look correct. 427 */ 428 arcn->type = PAX_SLK; 429 arcn->sb.st_mode |= S_IFLNK; 430 break; 431 case LNKTYPE: 432 /* 433 * hard link, need to get the link name, set the type in the 434 * st_mode and st_nlink so -v printing will look better. 435 */ 436 arcn->type = PAX_HLK; 437 arcn->sb.st_nlink = 2; 438 439 /* 440 * no idea of what type this thing really points at, but 441 * we set something for printing only. 442 */ 443 arcn->sb.st_mode |= S_IFREG; 444 break; 445 case LONGLINKTYPE: 446 case LONGNAMETYPE: 447 /* 448 * GNU long link/file; we tag these here and let the 449 * pax internals deal with it -- too ugly otherwise. 450 */ 451 arcn->type = 452 hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; 453 arcn->pad = TAR_PAD(arcn->sb.st_size); 454 arcn->skip = arcn->sb.st_size; 455 break; 456 case DIRTYPE: 457 /* 458 * It is a directory, set the mode for -v printing 459 */ 460 arcn->type = PAX_DIR; 461 arcn->sb.st_mode |= S_IFDIR; 462 arcn->sb.st_nlink = 2; 463 break; 464 case AREGTYPE: 465 case REGTYPE: 466 default: 467 /* 468 * If we have a trailing / this is a directory and NOT a file. 469 */ 470 arcn->ln_name[0] = '\0'; 471 arcn->ln_nlen = 0; 472 if (*pt == '/') { 473 /* 474 * it is a directory, set the mode for -v printing 475 */ 476 arcn->type = PAX_DIR; 477 arcn->sb.st_mode |= S_IFDIR; 478 arcn->sb.st_nlink = 2; 479 } else { 480 /* 481 * have a file that will be followed by data. Set the 482 * skip value to the size field and calculate the size 483 * of the padding. 484 */ 485 arcn->type = PAX_REG; 486 arcn->sb.st_mode |= S_IFREG; 487 arcn->pad = TAR_PAD(arcn->sb.st_size); 488 arcn->skip = arcn->sb.st_size; 489 } 490 break; 491 } 492 493 /* 494 * strip off any trailing slash. 495 */ 496 if (*pt == '/') { 497 *pt = '\0'; 498 --arcn->nlen; 499 } 500 return(0); 501 } 502 503 /* 504 * tar_wr() 505 * write a tar header for the file specified in the ARCHD to the archive. 506 * Have to check for file types that cannot be stored and file names that 507 * are too long. Be careful of the term (last arg) to ul_oct, each field 508 * of tar has it own spec for the termination character(s). 509 * ASSUMED: space after header in header block is zero filled 510 * Return: 511 * 0 if file has data to be written after the header, 1 if file has NO 512 * data to write after the header, -1 if archive write failed 513 */ 514 515 int 516 tar_wr(ARCHD *arcn) 517 { 518 HD_TAR *hd; 519 int len; 520 char hdblk[sizeof(HD_TAR)]; 521 522 /* 523 * check for those file system types which tar cannot store 524 */ 525 switch (arcn->type) { 526 case PAX_DIR: 527 /* 528 * user asked that dirs not be written to the archive 529 */ 530 if (tar_nodir) 531 return(1); 532 break; 533 case PAX_CHR: 534 paxwarn(1, "Tar cannot archive a character device %s", 535 arcn->org_name); 536 return(1); 537 case PAX_BLK: 538 paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name); 539 return(1); 540 case PAX_SCK: 541 paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name); 542 return(1); 543 case PAX_FIF: 544 paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name); 545 return(1); 546 case PAX_SLK: 547 case PAX_HLK: 548 case PAX_HRG: 549 if (arcn->ln_nlen > sizeof(hd->linkname)) { 550 paxwarn(1, "Link name too long for tar %s", 551 arcn->ln_name); 552 return(1); 553 } 554 break; 555 case PAX_REG: 556 case PAX_CTG: 557 default: 558 break; 559 } 560 561 /* 562 * check file name len, remember extra char for dirs (the / at the end) 563 */ 564 len = arcn->nlen; 565 if (arcn->type == PAX_DIR) 566 ++len; 567 if (len > sizeof(hd->name)) { 568 paxwarn(1, "File name too long for tar %s", arcn->name); 569 return(1); 570 } 571 572 /* 573 * Copy the data out of the ARCHD into the tar header based on the type 574 * of the file. Remember, many tar readers want all fields to be 575 * padded with zero so we zero the header first. We then set the 576 * linkflag field (type), the linkname, the size, and set the padding 577 * (if any) to be added after the file data (0 for all other types, 578 * as they only have a header). 579 */ 580 memset(hdblk, 0, sizeof(hdblk)); 581 hd = (HD_TAR *)hdblk; 582 fieldcpy(hd->name, sizeof(hd->name), arcn->name, sizeof(arcn->name)); 583 arcn->pad = 0; 584 585 if (arcn->type == PAX_DIR) { 586 /* 587 * directories are the same as files, except have a filename 588 * that ends with a /, we add the slash here. No data follows 589 * dirs, so no pad. 590 */ 591 hd->linkflag = AREGTYPE; 592 hd->name[len-1] = '/'; 593 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 594 goto out; 595 } else if (arcn->type == PAX_SLK) { 596 /* 597 * no data follows this file, so no pad 598 */ 599 hd->linkflag = SYMTYPE; 600 fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, 601 sizeof(arcn->ln_name)); 602 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 603 goto out; 604 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 605 /* 606 * no data follows this file, so no pad 607 */ 608 hd->linkflag = LNKTYPE; 609 fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, 610 sizeof(arcn->ln_name)); 611 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 612 goto out; 613 } else { 614 /* 615 * data follows this file, so set the pad 616 */ 617 hd->linkflag = AREGTYPE; 618 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size, 619 sizeof(hd->size), 1)) { 620 paxwarn(1,"File is too large for tar %s", arcn->org_name); 621 return(1); 622 } 623 arcn->pad = TAR_PAD(arcn->sb.st_size); 624 } 625 626 /* 627 * copy those fields that are independent of the type 628 */ 629 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 630 uqd_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, 631 sizeof(hd->mtime), 1) || 632 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 633 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0)) 634 goto out; 635 636 /* 637 * calculate and add the checksum, then write the header. A return of 638 * 0 tells the caller to now write the file data, 1 says no data needs 639 * to be written 640 */ 641 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 642 sizeof(hd->chksum), 3)) 643 goto out; 644 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 645 return(-1); 646 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 647 return(-1); 648 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 649 return(0); 650 return(1); 651 652 out: 653 /* 654 * header field is out of range 655 */ 656 paxwarn(1, "Tar header field is too small for %s", arcn->org_name); 657 return(1); 658 } 659 660 /* 661 * Routines for POSIX ustar 662 */ 663 664 /* 665 * ustar_strd() 666 * initialization for ustar read 667 * Return: 668 * 0 if ok, -1 otherwise 669 */ 670 671 int 672 ustar_strd(void) 673 { 674 if ((usrtb_start() < 0) || (grptb_start() < 0)) 675 return(-1); 676 return(0); 677 } 678 679 /* 680 * ustar_stwr() 681 * initialization for ustar write 682 * Return: 683 * 0 if ok, -1 otherwise 684 */ 685 686 int 687 ustar_stwr(void) 688 { 689 if ((uidtb_start() < 0) || (gidtb_start() < 0)) 690 return(-1); 691 return(0); 692 } 693 694 /* 695 * ustar_id() 696 * determine if a block given to us is a valid ustar header. We have to 697 * be on the lookout for those pesky blocks of all zero's 698 * Return: 699 * 0 if a ustar header, -1 otherwise 700 */ 701 702 int 703 ustar_id(char *blk, int size) 704 { 705 HD_USTAR *hd; 706 707 if (size < BLKMULT) 708 return(-1); 709 hd = (HD_USTAR *)blk; 710 711 /* 712 * check for block of zero's first, a simple and fast test then check 713 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 714 * programs are fouled up and create archives missing the \0. Last we 715 * check the checksum. If ok we have to assume it is a valid header. 716 */ 717 if (hd->prefix[0] == '\0' && hd->name[0] == '\0') 718 return(-1); 719 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 720 return(-1); 721 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) 722 return(-1); 723 return(0); 724 } 725 726 /* 727 * ustar_rd() 728 * extract the values out of block already determined to be a ustar header. 729 * store the values in the ARCHD parameter. 730 * Return: 731 * 0 732 */ 733 734 int 735 ustar_rd(ARCHD *arcn, char *buf) 736 { 737 HD_USTAR *hd; 738 char *dest; 739 int cnt = 0; 740 dev_t devmajor; 741 dev_t devminor; 742 u_quad_t val; 743 744 /* 745 * we only get proper sized buffers 746 */ 747 if (ustar_id(buf, BLKMULT) < 0) 748 return(-1); 749 memset(arcn, 0, sizeof(*arcn)); 750 arcn->org_name = arcn->name; 751 arcn->sb.st_nlink = 1; 752 hd = (HD_USTAR *)buf; 753 754 #ifndef SMALL 755 /* Process the Extended header. */ 756 if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) { 757 if (rd_xheader(arcn, buf, 758 (off_t)asc_ul(hd->size, sizeof(hd->size), OCT), 759 hd->typeflag) < 0) 760 return (-1); 761 } 762 #endif 763 764 if (!arcn->nlen) { 765 /* 766 * See if the filename is split into two parts. if, so join 767 * the parts. We copy the prefix first and add a / between 768 * the prefix and name. 769 */ 770 dest = arcn->name; 771 if (*(hd->prefix) != '\0') { 772 cnt = fieldcpy(dest, sizeof(arcn->name) - 1, 773 hd->prefix, sizeof(hd->prefix)); 774 dest += cnt; 775 *dest++ = '/'; 776 cnt++; 777 } else 778 cnt = 0; 779 780 if (hd->typeflag != LONGLINKTYPE && 781 hd->typeflag != LONGNAMETYPE) { 782 arcn->nlen = cnt + expandname(dest, 783 sizeof(arcn->name) - cnt, &gnu_name_string, 784 hd->name, sizeof(hd->name)); 785 } 786 } 787 788 if (!arcn->ln_nlen && 789 hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 790 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 791 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 792 } 793 794 /* 795 * follow the spec to the letter. we should only have mode bits, strip 796 * off all other crud we may be passed. 797 */ 798 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 799 0xfff); 800 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT); 801 val = asc_uqd(hd->mtime, sizeof(hd->mtime), OCT); 802 if ((time_t)val < 0 || (time_t)val != val) 803 arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ 804 else 805 arcn->sb.st_mtime = val; 806 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 807 808 /* 809 * If we can find the ascii names for gname and uname in the password 810 * and group files we will use the uid's and gid they bind. Otherwise 811 * we use the uid and gid values stored in the header. (This is what 812 * the posix spec wants). 813 */ 814 hd->gname[sizeof(hd->gname) - 1] = '\0'; 815 if (Nflag || gid_name(hd->gname, &(arcn->sb.st_gid)) < 0) 816 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 817 hd->uname[sizeof(hd->uname) - 1] = '\0'; 818 if (Nflag || uid_name(hd->uname, &(arcn->sb.st_uid)) < 0) 819 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 820 821 /* 822 * set the defaults, these may be changed depending on the file type 823 */ 824 arcn->pad = 0; 825 arcn->skip = 0; 826 arcn->sb.st_rdev = (dev_t)0; 827 828 /* 829 * set the mode and PAX type according to the typeflag in the header 830 */ 831 switch (hd->typeflag) { 832 case FIFOTYPE: 833 arcn->type = PAX_FIF; 834 arcn->sb.st_mode |= S_IFIFO; 835 break; 836 case DIRTYPE: 837 arcn->type = PAX_DIR; 838 arcn->sb.st_mode |= S_IFDIR; 839 arcn->sb.st_nlink = 2; 840 841 /* 842 * Some programs that create ustar archives append a '/' 843 * to the pathname for directories. This clearly violates 844 * ustar specs, but we will silently strip it off anyway. 845 */ 846 if (arcn->name[arcn->nlen - 1] == '/') 847 arcn->name[--arcn->nlen] = '\0'; 848 break; 849 case BLKTYPE: 850 case CHRTYPE: 851 /* 852 * this type requires the rdev field to be set. 853 */ 854 if (hd->typeflag == BLKTYPE) { 855 arcn->type = PAX_BLK; 856 arcn->sb.st_mode |= S_IFBLK; 857 } else { 858 arcn->type = PAX_CHR; 859 arcn->sb.st_mode |= S_IFCHR; 860 } 861 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 862 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 863 arcn->sb.st_rdev = TODEV(devmajor, devminor); 864 break; 865 case SYMTYPE: 866 case LNKTYPE: 867 if (hd->typeflag == SYMTYPE) { 868 arcn->type = PAX_SLK; 869 arcn->sb.st_mode |= S_IFLNK; 870 } else { 871 arcn->type = PAX_HLK; 872 /* 873 * so printing looks better 874 */ 875 arcn->sb.st_mode |= S_IFREG; 876 arcn->sb.st_nlink = 2; 877 } 878 break; 879 case LONGLINKTYPE: 880 case LONGNAMETYPE: 881 /* 882 * GNU long link/file; we tag these here and let the 883 * pax internals deal with it -- too ugly otherwise. 884 */ 885 arcn->type = 886 hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; 887 arcn->pad = TAR_PAD(arcn->sb.st_size); 888 arcn->skip = arcn->sb.st_size; 889 break; 890 case CONTTYPE: 891 case AREGTYPE: 892 case REGTYPE: 893 default: 894 /* 895 * these types have file data that follows. Set the skip and 896 * pad fields. 897 */ 898 arcn->type = PAX_REG; 899 arcn->pad = TAR_PAD(arcn->sb.st_size); 900 arcn->skip = arcn->sb.st_size; 901 arcn->sb.st_mode |= S_IFREG; 902 break; 903 } 904 return(0); 905 } 906 907 /* 908 * ustar_wr() 909 * write a ustar header for the file specified in the ARCHD to the archive 910 * Have to check for file types that cannot be stored and file names that 911 * are too long. Be careful of the term (last arg) to ul_oct, we only use 912 * '\0' for the termination character (this is different than picky tar) 913 * ASSUMED: space after header in header block is zero filled 914 * Return: 915 * 0 if file has data to be written after the header, 1 if file has NO 916 * data to write after the header, -1 if archive write failed 917 */ 918 919 int 920 ustar_wr(ARCHD *arcn) 921 { 922 HD_USTAR *hd; 923 char *pt; 924 char hdblk[sizeof(HD_USTAR)]; 925 926 /* 927 * check for those file system types ustar cannot store 928 */ 929 if (arcn->type == PAX_SCK) { 930 paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name); 931 return(1); 932 } 933 934 /* 935 * user asked that dirs not be written to the archive 936 */ 937 if (arcn->type == PAX_DIR && tar_nodir) 938 return (1); 939 940 /* 941 * check the length of the linkname 942 */ 943 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 944 (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){ 945 paxwarn(1, "Link name too long for ustar %s", arcn->ln_name); 946 return(1); 947 } 948 949 /* 950 * split the path name into prefix and name fields (if needed). if 951 * pt != arcn->name, the name has to be split 952 */ 953 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 954 paxwarn(1, "File name too long for ustar %s", arcn->name); 955 return(1); 956 } 957 958 /* 959 * zero out the header so we don't have to worry about zero fill below 960 */ 961 memset(hdblk, 0, sizeof(hdblk)); 962 hd = (HD_USTAR *)hdblk; 963 arcn->pad = 0L; 964 965 /* 966 * split the name, or zero out the prefix 967 */ 968 if (pt != arcn->name) { 969 /* 970 * name was split, pt points at the / where the split is to 971 * occur, we remove the / and copy the first part to the prefix 972 */ 973 *pt = '\0'; 974 fieldcpy(hd->prefix, sizeof(hd->prefix), arcn->name, 975 sizeof(arcn->name)); 976 *pt++ = '/'; 977 } 978 979 /* 980 * copy the name part. this may be the whole path or the part after 981 * the prefix 982 */ 983 fieldcpy(hd->name, sizeof(hd->name), pt, 984 sizeof(arcn->name) - (pt - arcn->name)); 985 986 /* 987 * set the fields in the header that are type dependent 988 */ 989 switch (arcn->type) { 990 case PAX_DIR: 991 hd->typeflag = DIRTYPE; 992 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 993 goto out; 994 break; 995 case PAX_CHR: 996 case PAX_BLK: 997 if (arcn->type == PAX_CHR) 998 hd->typeflag = CHRTYPE; 999 else 1000 hd->typeflag = BLKTYPE; 1001 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1002 sizeof(hd->devmajor), 3) || 1003 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1004 sizeof(hd->devminor), 3) || 1005 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1006 goto out; 1007 break; 1008 case PAX_FIF: 1009 hd->typeflag = FIFOTYPE; 1010 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1011 goto out; 1012 break; 1013 case PAX_SLK: 1014 case PAX_HLK: 1015 case PAX_HRG: 1016 if (arcn->type == PAX_SLK) 1017 hd->typeflag = SYMTYPE; 1018 else 1019 hd->typeflag = LNKTYPE; 1020 fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, 1021 sizeof(arcn->ln_name)); 1022 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1023 goto out; 1024 break; 1025 case PAX_REG: 1026 case PAX_CTG: 1027 default: 1028 /* 1029 * file data with this type, set the padding 1030 */ 1031 if (arcn->type == PAX_CTG) 1032 hd->typeflag = CONTTYPE; 1033 else 1034 hd->typeflag = REGTYPE; 1035 arcn->pad = TAR_PAD(arcn->sb.st_size); 1036 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size, 1037 sizeof(hd->size), 3)) { 1038 paxwarn(1,"File is too long for ustar %s",arcn->org_name); 1039 return(1); 1040 } 1041 break; 1042 } 1043 1044 strncpy(hd->magic, TMAGIC, TMAGLEN); 1045 strncpy(hd->version, TVERSION, TVERSLEN); 1046 1047 /* 1048 * set the remaining fields. Some versions want all 16 bits of mode 1049 * we better humor them (they really do not meet spec though).... 1050 */ 1051 if (ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) { 1052 if (uid_nobody == 0) { 1053 if (uid_name("nobody", &uid_nobody) == -1) 1054 goto out; 1055 } 1056 if (uid_warn != arcn->sb.st_uid) { 1057 uid_warn = arcn->sb.st_uid; 1058 paxwarn(1, 1059 "Ustar header field is too small for uid %lu, " 1060 "using nobody", (u_long)arcn->sb.st_uid); 1061 } 1062 if (ul_oct((u_long)uid_nobody, hd->uid, sizeof(hd->uid), 3)) 1063 goto out; 1064 } 1065 if (ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) { 1066 if (gid_nobody == 0) { 1067 if (gid_name("nobody", &gid_nobody) == -1) 1068 goto out; 1069 } 1070 if (gid_warn != arcn->sb.st_gid) { 1071 gid_warn = arcn->sb.st_gid; 1072 paxwarn(1, 1073 "Ustar header field is too small for gid %lu, " 1074 "using nobody", (u_long)arcn->sb.st_gid); 1075 } 1076 if (ul_oct((u_long)gid_nobody, hd->gid, sizeof(hd->gid), 3)) 1077 goto out; 1078 } 1079 if (uqd_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, 1080 sizeof(hd->mtime), 3) || 1081 ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) 1082 goto out; 1083 if (!Nflag) { 1084 strncpy(hd->uname, name_uid(arcn->sb.st_uid, 0), sizeof(hd->uname)); 1085 strncpy(hd->gname, name_gid(arcn->sb.st_gid, 0), sizeof(hd->gname)); 1086 } else { 1087 strncpy(hd->uname, "", sizeof(hd->uname)); 1088 strncpy(hd->gname, "", sizeof(hd->gname)); 1089 } 1090 1091 /* 1092 * calculate and store the checksum write the header to the archive 1093 * return 0 tells the caller to now write the file data, 1 says no data 1094 * needs to be written 1095 */ 1096 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1097 sizeof(hd->chksum), 3)) 1098 goto out; 1099 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1100 return(-1); 1101 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1102 return(-1); 1103 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1104 return(0); 1105 return(1); 1106 1107 out: 1108 /* 1109 * header field is out of range 1110 */ 1111 paxwarn(1, "Ustar header field is too small for %s", arcn->org_name); 1112 return(1); 1113 } 1114 1115 /* 1116 * name_split() 1117 * see if the name has to be split for storage in a ustar header. We try 1118 * to fit the entire name in the name field without splitting if we can. 1119 * The split point is always at a / 1120 * Return 1121 * character pointer to split point (always the / that is to be removed 1122 * if the split is not needed, the points is set to the start of the file 1123 * name (it would violate the spec to split there). A NULL is returned if 1124 * the file name is too long 1125 */ 1126 1127 static char * 1128 name_split(char *name, int len) 1129 { 1130 char *start; 1131 1132 /* 1133 * check to see if the file name is small enough to fit in the name 1134 * field. if so just return a pointer to the name. 1135 * The strings can fill the complete name and prefix fields 1136 * without a NUL terminator. 1137 */ 1138 if (len <= TNMSZ) 1139 return(name); 1140 if (len > (TPFSZ + TNMSZ + 1)) 1141 return(NULL); 1142 1143 /* 1144 * we start looking at the biggest sized piece that fits in the name 1145 * field. We walk forward looking for a slash to split at. The idea is 1146 * to find the biggest piece to fit in the name field (or the smallest 1147 * prefix we can find) (the -1 is correct the biggest piece would 1148 * include the slash between the two parts that gets thrown away) 1149 */ 1150 start = name + len - TNMSZ - 1; 1151 while ((*start != '\0') && (*start != '/')) 1152 ++start; 1153 1154 /* 1155 * if we hit the end of the string, this name cannot be split, so we 1156 * cannot store this file. 1157 */ 1158 if (*start == '\0') 1159 return(NULL); 1160 len = start - name; 1161 1162 /* 1163 * NOTE: /str where the length of str == TNMSZ can not be stored under 1164 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1165 * the file would then expand on extract to //str. The len == 0 below 1166 * makes this special case follow the spec to the letter. 1167 */ 1168 if ((len > TPFSZ) || (len == 0)) 1169 return(NULL); 1170 1171 /* 1172 * ok have a split point, return it to the caller 1173 */ 1174 return(start); 1175 } 1176 1177 static size_t 1178 expandname(char *buf, size_t len, char **gnu_name, const char *name, 1179 size_t limit) 1180 { 1181 size_t nlen; 1182 1183 if (*gnu_name) { 1184 /* *gnu_name is NUL terminated */ 1185 if ((nlen = strlcpy(buf, *gnu_name, len)) >= len) 1186 nlen = len - 1; 1187 free(*gnu_name); 1188 *gnu_name = NULL; 1189 } else 1190 nlen = fieldcpy(buf, len, name, limit); 1191 return(nlen); 1192 } 1193 1194 #ifndef SMALL 1195 1196 #define MINXHDRSZ 6 1197 1198 static int 1199 rd_xheader(ARCHD *arcn, char *buf, off_t size, char typeflag) 1200 { 1201 off_t len; 1202 char *delim, *keyword; 1203 char *nextp, *p; 1204 1205 if (size < MINXHDRSZ) { 1206 paxwarn(1, "Invalid extended header length"); 1207 return (-1); 1208 } 1209 if (rd_wrbuf(buf, size) != size) 1210 return (-1); 1211 if (rd_skip((off_t)BLKMULT - size) < 0) 1212 return (-1); 1213 1214 for (p = buf; size > 0; size -= len, p = nextp) { 1215 if (!isdigit((unsigned char)*p)) { 1216 paxwarn(1, "Invalid extended header record"); 1217 return (-1); 1218 } 1219 errno = 0; 1220 len = strtoll(p, &delim, 10); 1221 if (*delim != ' ' || (errno == ERANGE && 1222 (len == LLONG_MIN || len == LLONG_MAX)) || 1223 len < MINXHDRSZ) { 1224 paxwarn(1, "Invalid extended header record length"); 1225 return (-1); 1226 } 1227 if (len > size) { 1228 paxwarn(1, "Extended header record length %lld is " 1229 "out of range", (long long)len); 1230 return (-1); 1231 } 1232 nextp = p + len; 1233 keyword = p = delim + 1; 1234 p = memchr(p, '=', len); 1235 if (!p || nextp[-1] != '\n') { 1236 paxwarn(1, "Malformed extended header record"); 1237 return (-1); 1238 } 1239 *p++ = nextp[-1] = '\0'; 1240 if (typeflag == XHDRTYPE) { 1241 if (!strcmp(keyword, "path")) { 1242 arcn->nlen = strlcpy(arcn->name, p, 1243 sizeof(arcn->name)); 1244 } else if (!strcmp(keyword, "linkpath")) { 1245 arcn->ln_nlen = strlcpy(arcn->ln_name, p, 1246 sizeof(arcn->ln_name)); 1247 } 1248 } 1249 } 1250 1251 /* Update the ustar header. */ 1252 if (rd_wrbuf(buf, BLKMULT) != BLKMULT) 1253 return (-1); 1254 return (0); 1255 } 1256 #endif 1257