1 /*- 2 * Copyright (c) 2003-2023 Tim Kientzle 3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA 4 * Copyright (c) 2016 Martin Matuska 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "archive_platform.h" 29 30 #ifdef HAVE_ERRNO_H 31 #include <errno.h> 32 #endif 33 #include <stddef.h> 34 #ifdef HAVE_STDLIB_H 35 #include <stdlib.h> 36 #endif 37 #ifdef HAVE_STRING_H 38 #include <string.h> 39 #endif 40 41 #include "archive.h" 42 #include "archive_acl_private.h" /* For ACL parsing routines. */ 43 #include "archive_entry.h" 44 #include "archive_entry_locale.h" 45 #include "archive_private.h" 46 #include "archive_read_private.h" 47 48 #define tar_min(a,b) ((a) < (b) ? (a) : (b)) 49 50 /* 51 * Layout of POSIX 'ustar' tar header. 52 */ 53 struct archive_entry_header_ustar { 54 char name[100]; 55 char mode[8]; 56 char uid[8]; 57 char gid[8]; 58 char size[12]; 59 char mtime[12]; 60 char checksum[8]; 61 char typeflag[1]; 62 char linkname[100]; /* "old format" header ends here */ 63 char magic[6]; /* For POSIX: "ustar\0" */ 64 char version[2]; /* For POSIX: "00" */ 65 char uname[32]; 66 char gname[32]; 67 char rdevmajor[8]; 68 char rdevminor[8]; 69 char prefix[155]; 70 }; 71 72 /* 73 * Structure of GNU tar header 74 */ 75 struct gnu_sparse { 76 char offset[12]; 77 char numbytes[12]; 78 }; 79 80 struct archive_entry_header_gnutar { 81 char name[100]; 82 char mode[8]; 83 char uid[8]; 84 char gid[8]; 85 char size[12]; 86 char mtime[12]; 87 char checksum[8]; 88 char typeflag[1]; 89 char linkname[100]; 90 char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ 91 char uname[32]; 92 char gname[32]; 93 char rdevmajor[8]; 94 char rdevminor[8]; 95 char atime[12]; 96 char ctime[12]; 97 char offset[12]; 98 char longnames[4]; 99 char unused[1]; 100 struct gnu_sparse sparse[4]; 101 char isextended[1]; 102 char realsize[12]; 103 /* 104 * Old GNU format doesn't use POSIX 'prefix' field; they use 105 * the 'L' (longname) entry instead. 106 */ 107 }; 108 109 /* 110 * Data specific to this format. 111 */ 112 struct sparse_block { 113 struct sparse_block *next; 114 int64_t offset; 115 int64_t remaining; 116 int hole; 117 }; 118 119 struct tar { 120 struct archive_string entry_pathname; 121 /* For "GNU.sparse.name" and other similar path extensions. */ 122 struct archive_string entry_pathname_override; 123 struct archive_string entry_uname; 124 struct archive_string entry_gname; 125 struct archive_string entry_linkpath; 126 struct archive_string line; 127 int pax_hdrcharset_utf8; 128 int64_t entry_bytes_remaining; 129 int64_t entry_offset; 130 int64_t entry_padding; 131 int64_t entry_bytes_unconsumed; 132 int64_t realsize; 133 struct sparse_block *sparse_list; 134 struct sparse_block *sparse_last; 135 int64_t sparse_offset; 136 int64_t sparse_numbytes; 137 int sparse_gnu_major; 138 int sparse_gnu_minor; 139 char sparse_gnu_attributes_seen; 140 char filetype; 141 142 struct archive_string localname; 143 struct archive_string_conv *opt_sconv; 144 struct archive_string_conv *sconv; 145 struct archive_string_conv *sconv_acl; 146 struct archive_string_conv *sconv_default; 147 int init_default_conversion; 148 int compat_2x; 149 int process_mac_extensions; 150 int read_concatenated_archives; 151 int realsize_override; 152 }; 153 154 static int archive_block_is_null(const char *p); 155 static char *base64_decode(const char *, size_t, size_t *); 156 static int gnu_add_sparse_entry(struct archive_read *, struct tar *, 157 int64_t offset, int64_t remaining); 158 159 static void gnu_clear_sparse_list(struct tar *); 160 static int gnu_sparse_old_read(struct archive_read *, struct tar *, 161 const struct archive_entry_header_gnutar *header, size_t *); 162 static int gnu_sparse_old_parse(struct archive_read *, struct tar *, 163 const struct gnu_sparse *sparse, int length); 164 static int gnu_sparse_01_parse(struct archive_read *, struct tar *, 165 const char *, size_t); 166 static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, 167 size_t *); 168 static int header_Solaris_ACL(struct archive_read *, struct tar *, 169 struct archive_entry *, const void *, size_t *); 170 static int header_common(struct archive_read *, struct tar *, 171 struct archive_entry *, const void *); 172 static int header_old_tar(struct archive_read *, struct tar *, 173 struct archive_entry *, const void *); 174 static int header_pax_extension(struct archive_read *, struct tar *, 175 struct archive_entry *, const void *, size_t *); 176 static int header_pax_global(struct archive_read *, struct tar *, 177 struct archive_entry *, const void *h, size_t *); 178 static int header_gnu_longlink(struct archive_read *, struct tar *, 179 struct archive_entry *, const void *h, size_t *); 180 static int header_gnu_longname(struct archive_read *, struct tar *, 181 struct archive_entry *, const void *h, size_t *); 182 static int is_mac_metadata_entry(struct archive_entry *entry); 183 static int read_mac_metadata_blob(struct archive_read *, 184 struct archive_entry *, size_t *); 185 static int header_volume(struct archive_read *, struct tar *, 186 struct archive_entry *, const void *h, size_t *); 187 static int header_ustar(struct archive_read *, struct tar *, 188 struct archive_entry *, const void *h); 189 static int header_gnutar(struct archive_read *, struct tar *, 190 struct archive_entry *, const void *h, size_t *); 191 static int archive_read_format_tar_bid(struct archive_read *, int); 192 static int archive_read_format_tar_options(struct archive_read *, 193 const char *, const char *); 194 static int archive_read_format_tar_cleanup(struct archive_read *); 195 static int archive_read_format_tar_read_data(struct archive_read *a, 196 const void **buff, size_t *size, int64_t *offset); 197 static int archive_read_format_tar_skip(struct archive_read *a); 198 static int archive_read_format_tar_read_header(struct archive_read *, 199 struct archive_entry *); 200 static int checksum(struct archive_read *, const void *); 201 static int pax_attribute(struct archive_read *, struct tar *, 202 struct archive_entry *, const char *key, size_t key_length, 203 size_t value_length, size_t *unconsumed); 204 static int pax_attribute_LIBARCHIVE_xattr(struct archive_entry *, 205 const char *, size_t, const char *, size_t); 206 static int pax_attribute_SCHILY_acl(struct archive_read *, struct tar *, 207 struct archive_entry *, size_t, int); 208 static int pax_attribute_SUN_holesdata(struct archive_read *, struct tar *, 209 struct archive_entry *, const char *, size_t); 210 static void pax_time(const char *, size_t, int64_t *sec, long *nanos); 211 static ssize_t readline(struct archive_read *, struct tar *, const char **, 212 ssize_t limit, size_t *); 213 static int read_body_to_string(struct archive_read *, struct tar *, 214 struct archive_string *, const void *h, size_t *); 215 static int read_bytes_to_string(struct archive_read *, 216 struct archive_string *, size_t, size_t *); 217 static int64_t tar_atol(const char *, size_t); 218 static int64_t tar_atol10(const char *, size_t); 219 static int64_t tar_atol256(const char *, size_t); 220 static int64_t tar_atol8(const char *, size_t); 221 static int tar_read_header(struct archive_read *, struct tar *, 222 struct archive_entry *, size_t *); 223 static int tohex(int c); 224 static char *url_decode(const char *, size_t); 225 static void tar_flush_unconsumed(struct archive_read *, size_t *); 226 227 /* Sanity limits: These numbers should be low enough to 228 * prevent a maliciously-crafted archive from forcing us to 229 * allocate extreme amounts of memory. But of course, they 230 * need to be high enough for any correct value. These 231 * will likely need some adjustment as we get more experience. */ 232 static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */ 233 static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */ 234 static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */ 235 static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */ 236 static const size_t fflags_limit = 512; /* Longest fflags */ 237 static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */ 238 static const int64_t entry_limit = 0xfffffffffffffffLL; /* 2^60 bytes = 1 ExbiByte */ 239 240 int 241 archive_read_support_format_gnutar(struct archive *a) 242 { 243 archive_check_magic(a, ARCHIVE_READ_MAGIC, 244 ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); 245 return (archive_read_support_format_tar(a)); 246 } 247 248 249 int 250 archive_read_support_format_tar(struct archive *_a) 251 { 252 struct archive_read *a = (struct archive_read *)_a; 253 struct tar *tar; 254 int r; 255 256 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 257 ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); 258 259 tar = calloc(1, sizeof(*tar)); 260 if (tar == NULL) { 261 archive_set_error(&a->archive, ENOMEM, 262 "Can't allocate tar data"); 263 return (ARCHIVE_FATAL); 264 } 265 #ifdef HAVE_COPYFILE_H 266 /* Set this by default on Mac OS. */ 267 tar->process_mac_extensions = 1; 268 #endif 269 270 r = __archive_read_register_format(a, tar, "tar", 271 archive_read_format_tar_bid, 272 archive_read_format_tar_options, 273 archive_read_format_tar_read_header, 274 archive_read_format_tar_read_data, 275 archive_read_format_tar_skip, 276 NULL, 277 archive_read_format_tar_cleanup, 278 NULL, 279 NULL); 280 281 if (r != ARCHIVE_OK) 282 free(tar); 283 return (ARCHIVE_OK); 284 } 285 286 static int 287 archive_read_format_tar_cleanup(struct archive_read *a) 288 { 289 struct tar *tar; 290 291 tar = (struct tar *)(a->format->data); 292 gnu_clear_sparse_list(tar); 293 archive_string_free(&tar->entry_pathname); 294 archive_string_free(&tar->entry_pathname_override); 295 archive_string_free(&tar->entry_uname); 296 archive_string_free(&tar->entry_gname); 297 archive_string_free(&tar->entry_linkpath); 298 archive_string_free(&tar->line); 299 archive_string_free(&tar->localname); 300 free(tar); 301 (a->format->data) = NULL; 302 return (ARCHIVE_OK); 303 } 304 305 /* 306 * Validate number field 307 * 308 * This has to be pretty lenient in order to accommodate the enormous 309 * variety of tar writers in the world: 310 * = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading 311 * zeros and allows fields to be terminated with space or null characters 312 * = Many writers use different termination (in particular, libarchive 313 * omits terminator bytes to squeeze one or two more digits) 314 * = Many writers pad with space and omit leading zeros 315 * = GNU tar and star write base-256 values if numbers are too 316 * big to be represented in octal 317 * 318 * Examples of specific tar headers that we should support: 319 * = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two 320 * null bytes, pads size with spaces and other numeric fields with zeroes 321 * = plexus-archiver prior to 2.6.3 (before switching to commons-compress) 322 * may have uid and gid fields filled with spaces without any octal digits 323 * at all and pads all numeric fields with spaces 324 * 325 * This should tolerate all variants in use. It will reject a field 326 * where the writer just left garbage after a trailing NUL. 327 */ 328 static int 329 validate_number_field(const char* p_field, size_t i_size) 330 { 331 unsigned char marker = (unsigned char)p_field[0]; 332 if (marker == 128 || marker == 255 || marker == 0) { 333 /* Base-256 marker, there's nothing we can check. */ 334 return 1; 335 } else { 336 /* Must be octal */ 337 size_t i = 0; 338 /* Skip any leading spaces */ 339 while (i < i_size && p_field[i] == ' ') { 340 ++i; 341 } 342 /* Skip octal digits. */ 343 while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') { 344 ++i; 345 } 346 /* Any remaining characters must be space or NUL padding. */ 347 while (i < i_size) { 348 if (p_field[i] != ' ' && p_field[i] != 0) { 349 return 0; 350 } 351 ++i; 352 } 353 return 1; 354 } 355 } 356 357 static int 358 archive_read_format_tar_bid(struct archive_read *a, int best_bid) 359 { 360 int bid; 361 const char *h; 362 const struct archive_entry_header_ustar *header; 363 364 (void)best_bid; /* UNUSED */ 365 366 bid = 0; 367 368 /* Now let's look at the actual header and see if it matches. */ 369 h = __archive_read_ahead(a, 512, NULL); 370 if (h == NULL) 371 return (-1); 372 373 /* If it's an end-of-archive mark, we can handle it. */ 374 if (h[0] == 0 && archive_block_is_null(h)) { 375 /* 376 * Usually, I bid the number of bits verified, but 377 * in this case, 4096 seems excessive so I picked 10 as 378 * an arbitrary but reasonable-seeming value. 379 */ 380 return (10); 381 } 382 383 /* If it's not an end-of-archive mark, it must have a valid checksum.*/ 384 if (!checksum(a, h)) 385 return (0); 386 bid += 48; /* Checksum is usually 6 octal digits. */ 387 388 header = (const struct archive_entry_header_ustar *)h; 389 390 /* Recognize POSIX formats. */ 391 if ((memcmp(header->magic, "ustar\0", 6) == 0) 392 && (memcmp(header->version, "00", 2) == 0)) 393 bid += 56; 394 395 /* Recognize GNU tar format. */ 396 if ((memcmp(header->magic, "ustar ", 6) == 0) 397 && (memcmp(header->version, " \0", 2) == 0)) 398 bid += 56; 399 400 /* Type flag must be null, digit or A-Z, a-z. */ 401 if (header->typeflag[0] != 0 && 402 !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && 403 !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && 404 !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) 405 return (0); 406 bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ 407 408 /* 409 * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields. 410 */ 411 if (validate_number_field(header->mode, sizeof(header->mode)) == 0 412 || validate_number_field(header->uid, sizeof(header->uid)) == 0 413 || validate_number_field(header->gid, sizeof(header->gid)) == 0 414 || validate_number_field(header->mtime, sizeof(header->mtime)) == 0 415 || validate_number_field(header->size, sizeof(header->size)) == 0 416 || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0 417 || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0) { 418 bid = 0; 419 } 420 421 return (bid); 422 } 423 424 static int 425 archive_read_format_tar_options(struct archive_read *a, 426 const char *key, const char *val) 427 { 428 struct tar *tar; 429 int ret = ARCHIVE_FAILED; 430 431 tar = (struct tar *)(a->format->data); 432 if (strcmp(key, "compat-2x") == 0) { 433 /* Handle UTF-8 filenames as libarchive 2.x */ 434 tar->compat_2x = (val != NULL && val[0] != 0); 435 tar->init_default_conversion = tar->compat_2x; 436 return (ARCHIVE_OK); 437 } else if (strcmp(key, "hdrcharset") == 0) { 438 if (val == NULL || val[0] == 0) 439 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 440 "tar: hdrcharset option needs a character-set name"); 441 else { 442 tar->opt_sconv = 443 archive_string_conversion_from_charset( 444 &a->archive, val, 0); 445 if (tar->opt_sconv != NULL) 446 ret = ARCHIVE_OK; 447 else 448 ret = ARCHIVE_FATAL; 449 } 450 return (ret); 451 } else if (strcmp(key, "mac-ext") == 0) { 452 tar->process_mac_extensions = (val != NULL && val[0] != 0); 453 return (ARCHIVE_OK); 454 } else if (strcmp(key, "read_concatenated_archives") == 0) { 455 tar->read_concatenated_archives = (val != NULL && val[0] != 0); 456 return (ARCHIVE_OK); 457 } 458 459 /* Note: The "warn" return is just to inform the options 460 * supervisor that we didn't handle it. It will generate 461 * a suitable error if no one used this option. */ 462 return (ARCHIVE_WARN); 463 } 464 465 /* utility function- this exists to centralize the logic of tracking 466 * how much unconsumed data we have floating around, and to consume 467 * anything outstanding since we're going to do read_aheads 468 */ 469 static void 470 tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) 471 { 472 if (*unconsumed) { 473 /* 474 void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); 475 * this block of code is to poison claimed unconsumed space, ensuring 476 * things break if it is in use still. 477 * currently it WILL break things, so enable it only for debugging this issue 478 if (data) { 479 memset(data, 0xff, *unconsumed); 480 } 481 */ 482 __archive_read_consume(a, *unconsumed); 483 *unconsumed = 0; 484 } 485 } 486 487 /* 488 * The function invoked by archive_read_next_header(). This 489 * just sets up a few things and then calls the internal 490 * tar_read_header() function below. 491 */ 492 static int 493 archive_read_format_tar_read_header(struct archive_read *a, 494 struct archive_entry *entry) 495 { 496 /* 497 * When converting tar archives to cpio archives, it is 498 * essential that each distinct file have a distinct inode 499 * number. To simplify this, we keep a static count here to 500 * assign fake dev/inode numbers to each tar entry. Note that 501 * pax format archives may overwrite this with something more 502 * useful. 503 * 504 * Ideally, we would track every file read from the archive so 505 * that we could assign the same dev/ino pair to hardlinks, 506 * but the memory required to store a complete lookup table is 507 * probably not worthwhile just to support the relatively 508 * obscure tar->cpio conversion case. 509 */ 510 /* TODO: Move this into `struct tar` to avoid conflicts 511 * when reading multiple archives */ 512 static int default_inode; 513 static int default_dev; 514 struct tar *tar; 515 const char *p; 516 const wchar_t *wp; 517 int r; 518 size_t l, unconsumed = 0; 519 520 /* Assign default device/inode values. */ 521 archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ 522 archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ 523 /* Limit generated st_ino number to 16 bits. */ 524 if (default_inode >= 0xffff) { 525 ++default_dev; 526 default_inode = 0; 527 } 528 529 tar = (struct tar *)(a->format->data); 530 tar->entry_offset = 0; 531 gnu_clear_sparse_list(tar); 532 tar->realsize = -1; /* Mark this as "unset" */ 533 tar->realsize_override = 0; 534 535 /* Setup default string conversion. */ 536 tar->sconv = tar->opt_sconv; 537 if (tar->sconv == NULL) { 538 if (!tar->init_default_conversion) { 539 tar->sconv_default = 540 archive_string_default_conversion_for_read(&(a->archive)); 541 tar->init_default_conversion = 1; 542 } 543 tar->sconv = tar->sconv_default; 544 } 545 546 r = tar_read_header(a, tar, entry, &unconsumed); 547 548 tar_flush_unconsumed(a, &unconsumed); 549 550 /* 551 * "non-sparse" files are really just sparse files with 552 * a single block. 553 */ 554 if (tar->sparse_list == NULL) { 555 if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) 556 != ARCHIVE_OK) 557 return (ARCHIVE_FATAL); 558 } else { 559 struct sparse_block *sb; 560 561 for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { 562 if (!sb->hole) 563 archive_entry_sparse_add_entry(entry, 564 sb->offset, sb->remaining); 565 } 566 } 567 568 if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) { 569 /* 570 * "Regular" entry with trailing '/' is really 571 * directory: This is needed for certain old tar 572 * variants and even for some broken newer ones. 573 */ 574 if ((wp = archive_entry_pathname_w(entry)) != NULL) { 575 l = wcslen(wp); 576 if (l > 0 && wp[l - 1] == L'/') { 577 archive_entry_set_filetype(entry, AE_IFDIR); 578 tar->entry_bytes_remaining = 0; 579 tar->entry_padding = 0; 580 } 581 } else if ((p = archive_entry_pathname(entry)) != NULL) { 582 l = strlen(p); 583 if (l > 0 && p[l - 1] == '/') { 584 archive_entry_set_filetype(entry, AE_IFDIR); 585 tar->entry_bytes_remaining = 0; 586 tar->entry_padding = 0; 587 } 588 } 589 } 590 return (r); 591 } 592 593 static int 594 archive_read_format_tar_read_data(struct archive_read *a, 595 const void **buff, size_t *size, int64_t *offset) 596 { 597 ssize_t bytes_read; 598 struct tar *tar; 599 struct sparse_block *p; 600 601 tar = (struct tar *)(a->format->data); 602 603 for (;;) { 604 /* Remove exhausted entries from sparse list. */ 605 while (tar->sparse_list != NULL && 606 tar->sparse_list->remaining == 0) { 607 p = tar->sparse_list; 608 tar->sparse_list = p->next; 609 free(p); 610 } 611 612 if (tar->entry_bytes_unconsumed) { 613 __archive_read_consume(a, tar->entry_bytes_unconsumed); 614 tar->entry_bytes_unconsumed = 0; 615 } 616 617 /* If we're at end of file, return EOF. */ 618 if (tar->sparse_list == NULL || 619 tar->entry_bytes_remaining == 0) { 620 if (__archive_read_consume(a, tar->entry_padding) < 0) 621 return (ARCHIVE_FATAL); 622 tar->entry_padding = 0; 623 *buff = NULL; 624 *size = 0; 625 *offset = tar->realsize; 626 return (ARCHIVE_EOF); 627 } 628 629 *buff = __archive_read_ahead(a, 1, &bytes_read); 630 if (*buff == NULL) { 631 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 632 "Truncated tar archive" 633 " detected while reading data"); 634 return (ARCHIVE_FATAL); 635 } 636 if (bytes_read > tar->entry_bytes_remaining) 637 bytes_read = (ssize_t)tar->entry_bytes_remaining; 638 /* Don't read more than is available in the 639 * current sparse block. */ 640 if (tar->sparse_list->remaining < bytes_read) 641 bytes_read = (ssize_t)tar->sparse_list->remaining; 642 *size = bytes_read; 643 *offset = tar->sparse_list->offset; 644 tar->sparse_list->remaining -= bytes_read; 645 tar->sparse_list->offset += bytes_read; 646 tar->entry_bytes_remaining -= bytes_read; 647 tar->entry_bytes_unconsumed = bytes_read; 648 649 if (!tar->sparse_list->hole) 650 return (ARCHIVE_OK); 651 /* Current is hole data and skip this. */ 652 } 653 } 654 655 static int 656 archive_read_format_tar_skip(struct archive_read *a) 657 { 658 int64_t bytes_skipped; 659 int64_t request; 660 struct sparse_block *p; 661 struct tar* tar; 662 663 tar = (struct tar *)(a->format->data); 664 665 /* Do not consume the hole of a sparse file. */ 666 request = 0; 667 for (p = tar->sparse_list; p != NULL; p = p->next) { 668 if (!p->hole) { 669 if (p->remaining >= INT64_MAX - request) { 670 return ARCHIVE_FATAL; 671 } 672 request += p->remaining; 673 } 674 } 675 if (request > tar->entry_bytes_remaining) 676 request = tar->entry_bytes_remaining; 677 request += tar->entry_padding + tar->entry_bytes_unconsumed; 678 679 bytes_skipped = __archive_read_consume(a, request); 680 if (bytes_skipped < 0) 681 return (ARCHIVE_FATAL); 682 683 tar->entry_bytes_remaining = 0; 684 tar->entry_bytes_unconsumed = 0; 685 tar->entry_padding = 0; 686 687 /* Free the sparse list. */ 688 gnu_clear_sparse_list(tar); 689 690 return (ARCHIVE_OK); 691 } 692 693 /* 694 * This function reads and interprets all of the headers associated 695 * with a single entry. 696 */ 697 static int 698 tar_read_header(struct archive_read *a, struct tar *tar, 699 struct archive_entry *entry, size_t *unconsumed) 700 { 701 ssize_t bytes; 702 int err = ARCHIVE_OK, err2; 703 int eof_fatal = 0; /* EOF is okay at some points... */ 704 const char *h; 705 const struct archive_entry_header_ustar *header; 706 const struct archive_entry_header_gnutar *gnuheader; 707 708 /* Bitmask of what header types we've seen. */ 709 int32_t seen_headers = 0; 710 static const int32_t seen_A_header = 1; 711 static const int32_t seen_g_header = 2; 712 static const int32_t seen_K_header = 4; 713 static const int32_t seen_L_header = 8; 714 static const int32_t seen_V_header = 16; 715 static const int32_t seen_x_header = 32; /* Also X */ 716 static const int32_t seen_mac_metadata = 512; 717 718 tar->pax_hdrcharset_utf8 = 1; 719 tar->sparse_gnu_attributes_seen = 0; 720 archive_string_empty(&(tar->entry_gname)); 721 archive_string_empty(&(tar->entry_pathname)); 722 archive_string_empty(&(tar->entry_pathname_override)); 723 archive_string_empty(&(tar->entry_uname)); 724 archive_string_empty(&tar->entry_linkpath); 725 726 /* Ensure format is set. */ 727 if (a->archive.archive_format_name == NULL) { 728 a->archive.archive_format = ARCHIVE_FORMAT_TAR; 729 a->archive.archive_format_name = "tar"; 730 } 731 732 /* 733 * TODO: Write global/default pax options into 734 * 'entry' struct here before overwriting with 735 * file-specific options. 736 */ 737 738 /* Loop over all the headers needed for the next entry */ 739 for (;;) { 740 741 /* Find the next valid header record. */ 742 while (1) { 743 tar_flush_unconsumed(a, unconsumed); 744 745 /* Read 512-byte header record */ 746 h = __archive_read_ahead(a, 512, &bytes); 747 if (bytes == 0) { /* EOF at a block boundary. */ 748 if (eof_fatal) { 749 /* We've read a special header already; 750 * if there's no regular header, then this is 751 * a premature EOF. */ 752 archive_set_error(&a->archive, EINVAL, 753 "Damaged tar archive"); 754 return (ARCHIVE_FATAL); 755 } else { 756 return (ARCHIVE_EOF); 757 } 758 } 759 if (h == NULL) { /* Short block at EOF; this is bad. */ 760 archive_set_error(&a->archive, 761 ARCHIVE_ERRNO_FILE_FORMAT, 762 "Truncated tar archive" 763 " detected while reading next heaader"); 764 return (ARCHIVE_FATAL); 765 } 766 *unconsumed += 512; 767 768 if (h[0] == 0 && archive_block_is_null(h)) { 769 /* We found a NULL block which indicates end-of-archive */ 770 771 if (tar->read_concatenated_archives) { 772 /* We're ignoring NULL blocks, so keep going. */ 773 continue; 774 } 775 776 /* Try to consume a second all-null record, as well. */ 777 /* If we can't, that's okay. */ 778 tar_flush_unconsumed(a, unconsumed); 779 h = __archive_read_ahead(a, 512, NULL); 780 if (h != NULL && h[0] == 0 && archive_block_is_null(h)) 781 __archive_read_consume(a, 512); 782 783 archive_clear_error(&a->archive); 784 return (ARCHIVE_EOF); 785 } 786 787 /* This is NOT a null block, so it must be a valid header. */ 788 if (!checksum(a, h)) { 789 tar_flush_unconsumed(a, unconsumed); 790 archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); 791 /* If we've read some critical information (pax headers, etc) 792 * and _then_ see a bad header, we can't really recover. */ 793 if (eof_fatal) { 794 return (ARCHIVE_FATAL); 795 } else { 796 return (ARCHIVE_RETRY); 797 } 798 } 799 break; 800 } 801 802 /* Determine the format variant. */ 803 header = (const struct archive_entry_header_ustar *)h; 804 switch(header->typeflag[0]) { 805 case 'A': /* Solaris tar ACL */ 806 if (seen_headers & seen_A_header) { 807 return (ARCHIVE_FATAL); 808 } 809 seen_headers |= seen_A_header; 810 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 811 a->archive.archive_format_name = "Solaris tar"; 812 err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed); 813 break; 814 case 'g': /* POSIX-standard 'g' header. */ 815 if (seen_headers & seen_g_header) { 816 return (ARCHIVE_FATAL); 817 } 818 seen_headers |= seen_g_header; 819 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 820 a->archive.archive_format_name = "POSIX pax interchange format"; 821 err2 = header_pax_global(a, tar, entry, h, unconsumed); 822 break; 823 case 'K': /* Long link name (GNU tar, others) */ 824 if (seen_headers & seen_K_header) { 825 return (ARCHIVE_FATAL); 826 } 827 seen_headers |= seen_K_header; 828 err2 = header_gnu_longlink(a, tar, entry, h, unconsumed); 829 break; 830 case 'L': /* Long filename (GNU tar, others) */ 831 if (seen_headers & seen_L_header) { 832 return (ARCHIVE_FATAL); 833 } 834 seen_headers |= seen_L_header; 835 err2 = header_gnu_longname(a, tar, entry, h, unconsumed); 836 break; 837 case 'V': /* GNU volume header */ 838 if (seen_headers & seen_V_header) { 839 return (ARCHIVE_FATAL); 840 } 841 seen_headers |= seen_V_header; 842 err2 = header_volume(a, tar, entry, h, unconsumed); 843 break; 844 case 'X': /* Used by SUN tar; same as 'x'. */ 845 if (seen_headers & seen_x_header) { 846 return (ARCHIVE_FATAL); 847 } 848 seen_headers |= seen_x_header; 849 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 850 a->archive.archive_format_name = 851 "POSIX pax interchange format (Sun variant)"; 852 err2 = header_pax_extension(a, tar, entry, h, unconsumed); 853 break; 854 case 'x': /* POSIX-standard 'x' header. */ 855 if (seen_headers & seen_x_header) { 856 return (ARCHIVE_FATAL); 857 } 858 seen_headers |= seen_x_header; 859 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 860 a->archive.archive_format_name = "POSIX pax interchange format"; 861 err2 = header_pax_extension(a, tar, entry, h, unconsumed); 862 break; 863 default: /* Regular header: Legacy tar, GNU tar, or ustar */ 864 gnuheader = (const struct archive_entry_header_gnutar *)h; 865 if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { 866 a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; 867 a->archive.archive_format_name = "GNU tar format"; 868 err2 = header_gnutar(a, tar, entry, h, unconsumed); 869 } else if (memcmp(header->magic, "ustar", 5) == 0) { 870 if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 871 a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; 872 a->archive.archive_format_name = "POSIX ustar format"; 873 } 874 err2 = header_ustar(a, tar, entry, h); 875 } else { 876 a->archive.archive_format = ARCHIVE_FORMAT_TAR; 877 a->archive.archive_format_name = "tar (non-POSIX)"; 878 err2 = header_old_tar(a, tar, entry, h); 879 } 880 err = err_combine(err, err2); 881 /* We return warnings or success as-is. Anything else is fatal. */ 882 if (err < ARCHIVE_WARN) { 883 return (ARCHIVE_FATAL); 884 } 885 /* Filename of the form `._filename` is an AppleDouble 886 * extension entry. The body is the macOS metadata blob; 887 * this is followed by another entry with the actual 888 * regular file data. 889 * This design has two drawbacks: 890 * = it's brittle; you might just have a file with such a name 891 * = it duplicates any long pathname extensions 892 * 893 * TODO: This probably shouldn't be here at all. Consider 894 * just returning the contents as a regular entry here and 895 * then dealing with it when we write data to disk. 896 */ 897 if (tar->process_mac_extensions 898 && ((seen_headers & seen_mac_metadata) == 0) 899 && is_mac_metadata_entry(entry)) { 900 err2 = read_mac_metadata_blob(a, entry, unconsumed); 901 if (err2 < ARCHIVE_WARN) { 902 return (ARCHIVE_FATAL); 903 } 904 err = err_combine(err, err2); 905 /* Note: Other headers can appear again. */ 906 seen_headers = seen_mac_metadata; 907 break; 908 } 909 910 /* Reconcile GNU sparse attributes */ 911 if (tar->sparse_gnu_attributes_seen) { 912 /* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */ 913 if (tar->filetype != 'S' && tar->filetype != '0') { 914 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 915 "Non-regular file cannot be sparse"); 916 return (ARCHIVE_WARN); 917 } else if (tar->sparse_gnu_major == 0 && 918 tar->sparse_gnu_minor == 0) { 919 /* Sparse map already parsed from 'x' header */ 920 } else if (tar->sparse_gnu_major == 0 && 921 tar->sparse_gnu_minor == 1) { 922 /* Sparse map already parsed from 'x' header */ 923 } else if (tar->sparse_gnu_major == 1 && 924 tar->sparse_gnu_minor == 0) { 925 /* Sparse map is prepended to file contents */ 926 ssize_t bytes_read; 927 bytes_read = gnu_sparse_10_read(a, tar, unconsumed); 928 if (bytes_read < 0) 929 return ((int)bytes_read); 930 tar->entry_bytes_remaining -= bytes_read; 931 } else { 932 archive_set_error(&a->archive, 933 ARCHIVE_ERRNO_MISC, 934 "Unrecognized GNU sparse file format"); 935 return (ARCHIVE_WARN); 936 } 937 } 938 return (err); 939 } 940 941 /* We're between headers ... */ 942 err = err_combine(err, err2); 943 if (err == ARCHIVE_FATAL) 944 return (err); 945 946 /* The GNU volume header and the pax `g` global header 947 * are both allowed to be the only header in an 948 * archive. If we've seen any other header, a 949 * following EOF is fatal. */ 950 if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) { 951 eof_fatal = 1; 952 } 953 } 954 } 955 956 /* 957 * Return true if block checksum is correct. 958 */ 959 static int 960 checksum(struct archive_read *a, const void *h) 961 { 962 const unsigned char *bytes; 963 const struct archive_entry_header_ustar *header; 964 int check, sum; 965 size_t i; 966 967 (void)a; /* UNUSED */ 968 bytes = (const unsigned char *)h; 969 header = (const struct archive_entry_header_ustar *)h; 970 971 /* Checksum field must hold an octal number */ 972 for (i = 0; i < sizeof(header->checksum); ++i) { 973 char c = header->checksum[i]; 974 if (c != ' ' && c != '\0' && (c < '0' || c > '7')) 975 return 0; 976 } 977 978 /* 979 * Test the checksum. Note that POSIX specifies _unsigned_ 980 * bytes for this calculation. 981 */ 982 sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); 983 check = 0; 984 for (i = 0; i < 148; i++) 985 check += (unsigned char)bytes[i]; 986 for (; i < 156; i++) 987 check += 32; 988 for (; i < 512; i++) 989 check += (unsigned char)bytes[i]; 990 if (sum == check) 991 return (1); 992 993 /* 994 * Repeat test with _signed_ bytes, just in case this archive 995 * was created by an old BSD, Solaris, or HP-UX tar with a 996 * broken checksum calculation. 997 */ 998 check = 0; 999 for (i = 0; i < 148; i++) 1000 check += (signed char)bytes[i]; 1001 for (; i < 156; i++) 1002 check += 32; 1003 for (; i < 512; i++) 1004 check += (signed char)bytes[i]; 1005 if (sum == check) 1006 return (1); 1007 1008 #if DONT_FAIL_ON_CRC_ERROR 1009 /* Speed up fuzzing by pretending the checksum is always right. */ 1010 return (1); 1011 #else 1012 return (0); 1013 #endif 1014 } 1015 1016 /* 1017 * Return true if this block contains only nulls. 1018 */ 1019 static int 1020 archive_block_is_null(const char *p) 1021 { 1022 unsigned i; 1023 1024 for (i = 0; i < 512; i++) 1025 if (*p++) 1026 return (0); 1027 return (1); 1028 } 1029 1030 /* 1031 * Interpret 'A' Solaris ACL header 1032 */ 1033 static int 1034 header_Solaris_ACL(struct archive_read *a, struct tar *tar, 1035 struct archive_entry *entry, const void *h, size_t *unconsumed) 1036 { 1037 const struct archive_entry_header_ustar *header; 1038 struct archive_string acl_text; 1039 size_t size; 1040 int err, acl_type; 1041 int64_t type; 1042 char *acl, *p; 1043 1044 header = (const struct archive_entry_header_ustar *)h; 1045 size = (size_t)tar_atol(header->size, sizeof(header->size)); 1046 archive_string_init(&acl_text); 1047 err = read_body_to_string(a, tar, &acl_text, h, unconsumed); 1048 if (err != ARCHIVE_OK) { 1049 archive_string_free(&acl_text); 1050 return (err); 1051 } 1052 1053 /* TODO: Examine the first characters to see if this 1054 * is an AIX ACL descriptor. We'll likely never support 1055 * them, but it would be polite to recognize and warn when 1056 * we do see them. */ 1057 1058 /* Leading octal number indicates ACL type and number of entries. */ 1059 p = acl = acl_text.s; 1060 type = 0; 1061 while (*p != '\0' && p < acl + size) { 1062 if (*p < '0' || *p > '7') { 1063 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1064 "Malformed Solaris ACL attribute (invalid digit)"); 1065 archive_string_free(&acl_text); 1066 return(ARCHIVE_WARN); 1067 } 1068 type <<= 3; 1069 type += *p - '0'; 1070 if (type > 077777777) { 1071 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1072 "Malformed Solaris ACL attribute (count too large)"); 1073 archive_string_free(&acl_text); 1074 return (ARCHIVE_WARN); 1075 } 1076 p++; 1077 } 1078 switch ((int)type & ~0777777) { 1079 case 01000000: 1080 /* POSIX.1e ACL */ 1081 acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; 1082 break; 1083 case 03000000: 1084 /* NFSv4 ACL */ 1085 acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4; 1086 break; 1087 default: 1088 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1089 "Malformed Solaris ACL attribute (unsupported type %o)", 1090 (int)type); 1091 archive_string_free(&acl_text); 1092 return (ARCHIVE_WARN); 1093 } 1094 p++; 1095 1096 if (p >= acl + size) { 1097 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1098 "Malformed Solaris ACL attribute (body overflow)"); 1099 archive_string_free(&acl_text); 1100 return(ARCHIVE_WARN); 1101 } 1102 1103 /* ACL text is null-terminated; find the end. */ 1104 size -= (p - acl); 1105 acl = p; 1106 1107 while (*p != '\0' && p < acl + size) 1108 p++; 1109 1110 if (tar->sconv_acl == NULL) { 1111 tar->sconv_acl = archive_string_conversion_from_charset( 1112 &(a->archive), "UTF-8", 1); 1113 if (tar->sconv_acl == NULL) { 1114 archive_string_free(&acl_text); 1115 return (ARCHIVE_FATAL); 1116 } 1117 } 1118 archive_strncpy(&(tar->localname), acl, p - acl); 1119 err = archive_acl_from_text_l(archive_entry_acl(entry), 1120 tar->localname.s, acl_type, tar->sconv_acl); 1121 /* Workaround: Force perm_is_set() to be correct */ 1122 /* If this bit were stored in the ACL, this wouldn't be needed */ 1123 archive_entry_set_perm(entry, archive_entry_perm(entry)); 1124 if (err != ARCHIVE_OK) { 1125 if (errno == ENOMEM) { 1126 archive_set_error(&a->archive, ENOMEM, 1127 "Can't allocate memory for ACL"); 1128 } else 1129 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1130 "Malformed Solaris ACL attribute (unparsable)"); 1131 } 1132 archive_string_free(&acl_text); 1133 return (err); 1134 } 1135 1136 /* 1137 * Interpret 'K' long linkname header. 1138 */ 1139 static int 1140 header_gnu_longlink(struct archive_read *a, struct tar *tar, 1141 struct archive_entry *entry, const void *h, size_t *unconsumed) 1142 { 1143 int err; 1144 1145 struct archive_string linkpath; 1146 archive_string_init(&linkpath); 1147 err = read_body_to_string(a, tar, &linkpath, h, unconsumed); 1148 archive_entry_set_link(entry, linkpath.s); 1149 archive_string_free(&linkpath); 1150 return (err); 1151 } 1152 1153 static int 1154 set_conversion_failed_error(struct archive_read *a, 1155 struct archive_string_conv *sconv, const char *name) 1156 { 1157 if (errno == ENOMEM) { 1158 archive_set_error(&a->archive, ENOMEM, 1159 "Can't allocate memory for %s", name); 1160 return (ARCHIVE_FATAL); 1161 } 1162 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1163 "%s can't be converted from %s to current locale.", 1164 name, archive_string_conversion_charset_name(sconv)); 1165 return (ARCHIVE_WARN); 1166 } 1167 1168 /* 1169 * Interpret 'L' long filename header. 1170 */ 1171 static int 1172 header_gnu_longname(struct archive_read *a, struct tar *tar, 1173 struct archive_entry *entry, const void *h, size_t *unconsumed) 1174 { 1175 int err; 1176 struct archive_string longname; 1177 1178 archive_string_init(&longname); 1179 err = read_body_to_string(a, tar, &longname, h, unconsumed); 1180 if (err == ARCHIVE_OK) { 1181 if (archive_entry_copy_pathname_l(entry, longname.s, 1182 archive_strlen(&longname), tar->sconv) != 0) 1183 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1184 } 1185 archive_string_free(&longname); 1186 return (err); 1187 } 1188 1189 /* 1190 * Interpret 'V' GNU tar volume header. 1191 */ 1192 static int 1193 header_volume(struct archive_read *a, struct tar *tar, 1194 struct archive_entry *entry, const void *h, size_t *unconsumed) 1195 { 1196 const struct archive_entry_header_ustar *header; 1197 int64_t size, to_consume; 1198 1199 (void)a; /* UNUSED */ 1200 (void)tar; /* UNUSED */ 1201 (void)entry; /* UNUSED */ 1202 1203 header = (const struct archive_entry_header_ustar *)h; 1204 size = tar_atol(header->size, sizeof(header->size)); 1205 if (size > (int64_t)pathname_limit) { 1206 return (ARCHIVE_FATAL); 1207 } 1208 to_consume = ((size + 511) & ~511); 1209 *unconsumed += to_consume; 1210 return (ARCHIVE_OK); 1211 } 1212 1213 /* 1214 * Read the next `size` bytes into the provided string. 1215 * Null-terminate the string. 1216 */ 1217 static int 1218 read_bytes_to_string(struct archive_read *a, 1219 struct archive_string *as, size_t size, 1220 size_t *unconsumed) { 1221 const void *src; 1222 1223 /* Fail if we can't make our buffer big enough. */ 1224 if (archive_string_ensure(as, (size_t)size+1) == NULL) { 1225 archive_set_error(&a->archive, ENOMEM, 1226 "No memory"); 1227 return (ARCHIVE_FATAL); 1228 } 1229 1230 tar_flush_unconsumed(a, unconsumed); 1231 1232 /* Read the body into the string. */ 1233 src = __archive_read_ahead(a, size, NULL); 1234 if (src == NULL) { 1235 archive_set_error(&a->archive, EINVAL, 1236 "Truncated archive" 1237 " detected while reading metadata"); 1238 *unconsumed = 0; 1239 return (ARCHIVE_FATAL); 1240 } 1241 memcpy(as->s, src, (size_t)size); 1242 as->s[size] = '\0'; 1243 as->length = (size_t)size; 1244 *unconsumed += size; 1245 return (ARCHIVE_OK); 1246 } 1247 1248 /* 1249 * Read body of an archive entry into an archive_string object. 1250 */ 1251 static int 1252 read_body_to_string(struct archive_read *a, struct tar *tar, 1253 struct archive_string *as, const void *h, size_t *unconsumed) 1254 { 1255 int64_t size; 1256 const struct archive_entry_header_ustar *header; 1257 int r; 1258 1259 (void)tar; /* UNUSED */ 1260 header = (const struct archive_entry_header_ustar *)h; 1261 size = tar_atol(header->size, sizeof(header->size)); 1262 if (size > entry_limit) { 1263 return (ARCHIVE_FATAL); 1264 } 1265 if ((size > (int64_t)pathname_limit) || (size < 0)) { 1266 archive_string_empty(as); 1267 int64_t to_consume = ((size + 511) & ~511); 1268 if (to_consume != __archive_read_consume(a, to_consume)) { 1269 return (ARCHIVE_FATAL); 1270 } 1271 archive_set_error(&a->archive, EINVAL, 1272 "Special header too large: %d > 1MiB", 1273 (int)size); 1274 return (ARCHIVE_WARN); 1275 } 1276 r = read_bytes_to_string(a, as, size, unconsumed); 1277 *unconsumed += 0x1ff & (-size); 1278 return(r); 1279 } 1280 1281 /* 1282 * Parse out common header elements. 1283 * 1284 * This would be the same as header_old_tar, except that the 1285 * filename is handled slightly differently for old and POSIX 1286 * entries (POSIX entries support a 'prefix'). This factoring 1287 * allows header_old_tar and header_ustar 1288 * to handle filenames differently, while still putting most of the 1289 * common parsing into one place. 1290 */ 1291 static int 1292 header_common(struct archive_read *a, struct tar *tar, 1293 struct archive_entry *entry, const void *h) 1294 { 1295 const struct archive_entry_header_ustar *header; 1296 const char *existing_linkpath; 1297 const wchar_t *existing_wcs_linkpath; 1298 int err = ARCHIVE_OK; 1299 1300 header = (const struct archive_entry_header_ustar *)h; 1301 1302 /* Parse out the numeric fields (all are octal) */ 1303 1304 /* Split mode handling: Set filetype always, perm only if not already set */ 1305 archive_entry_set_filetype(entry, 1306 (mode_t)tar_atol(header->mode, sizeof(header->mode))); 1307 if (!archive_entry_perm_is_set(entry)) { 1308 archive_entry_set_perm(entry, 1309 (mode_t)tar_atol(header->mode, sizeof(header->mode))); 1310 } 1311 if (!archive_entry_uid_is_set(entry)) { 1312 archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); 1313 } 1314 if (!archive_entry_gid_is_set(entry)) { 1315 archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); 1316 } 1317 1318 tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); 1319 if (tar->entry_bytes_remaining < 0) { 1320 tar->entry_bytes_remaining = 0; 1321 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1322 "Tar entry has negative size"); 1323 return (ARCHIVE_FATAL); 1324 } 1325 if (tar->entry_bytes_remaining > entry_limit) { 1326 tar->entry_bytes_remaining = 0; 1327 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1328 "Tar entry size overflow"); 1329 return (ARCHIVE_FATAL); 1330 } 1331 if (!tar->realsize_override) { 1332 tar->realsize = tar->entry_bytes_remaining; 1333 } 1334 archive_entry_set_size(entry, tar->realsize); 1335 1336 if (!archive_entry_mtime_is_set(entry)) { 1337 archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); 1338 } 1339 1340 /* Handle the tar type flag appropriately. */ 1341 tar->filetype = header->typeflag[0]; 1342 1343 /* 1344 * TODO: If the linkpath came from Pax extension header, then 1345 * we should obey the hdrcharset_utf8 flag when converting these. 1346 */ 1347 switch (tar->filetype) { 1348 case '1': /* Hard link */ 1349 archive_entry_set_link_to_hardlink(entry); 1350 existing_wcs_linkpath = archive_entry_hardlink_w(entry); 1351 existing_linkpath = archive_entry_hardlink(entry); 1352 if ((existing_linkpath == NULL || existing_linkpath[0] == '\0') 1353 && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) { 1354 struct archive_string linkpath; 1355 archive_string_init(&linkpath); 1356 archive_strncpy(&linkpath, 1357 header->linkname, sizeof(header->linkname)); 1358 if (archive_entry_copy_hardlink_l(entry, linkpath.s, 1359 archive_strlen(&linkpath), tar->sconv) != 0) { 1360 err = set_conversion_failed_error(a, tar->sconv, 1361 "Linkname"); 1362 if (err == ARCHIVE_FATAL) { 1363 archive_string_free(&linkpath); 1364 return (err); 1365 } 1366 } 1367 archive_string_free(&linkpath); 1368 } 1369 /* 1370 * The following may seem odd, but: Technically, tar 1371 * does not store the file type for a "hard link" 1372 * entry, only the fact that it is a hard link. So, I 1373 * leave the type zero normally. But, pax interchange 1374 * format allows hard links to have data, which 1375 * implies that the underlying entry is a regular 1376 * file. 1377 */ 1378 if (archive_entry_size(entry) > 0) 1379 archive_entry_set_filetype(entry, AE_IFREG); 1380 1381 /* 1382 * A tricky point: Traditionally, tar readers have 1383 * ignored the size field when reading hardlink 1384 * entries, and some writers put non-zero sizes even 1385 * though the body is empty. POSIX blessed this 1386 * convention in the 1988 standard, but broke with 1387 * this tradition in 2001 by permitting hardlink 1388 * entries to store valid bodies in pax interchange 1389 * format, but not in ustar format. Since there is no 1390 * hard and fast way to distinguish pax interchange 1391 * from earlier archives (the 'x' and 'g' entries are 1392 * optional, after all), we need a heuristic. 1393 */ 1394 if (archive_entry_size(entry) == 0) { 1395 /* If the size is already zero, we're done. */ 1396 } else if (a->archive.archive_format 1397 == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 1398 /* Definitely pax extended; must obey hardlink size. */ 1399 } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR 1400 || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) 1401 { 1402 /* Old-style or GNU tar: we must ignore the size. */ 1403 archive_entry_set_size(entry, 0); 1404 tar->entry_bytes_remaining = 0; 1405 } else if (archive_read_format_tar_bid(a, 50) > 50) { 1406 /* 1407 * We don't know if it's pax: If the bid 1408 * function sees a valid ustar header 1409 * immediately following, then let's ignore 1410 * the hardlink size. 1411 */ 1412 archive_entry_set_size(entry, 0); 1413 tar->entry_bytes_remaining = 0; 1414 } 1415 /* 1416 * TODO: There are still two cases I'd like to handle: 1417 * = a ustar non-pax archive with a hardlink entry at 1418 * end-of-archive. (Look for block of nulls following?) 1419 * = a pax archive that has not seen any pax headers 1420 * and has an entry which is a hardlink entry storing 1421 * a body containing an uncompressed tar archive. 1422 * The first is worth addressing; I don't see any reliable 1423 * way to deal with the second possibility. 1424 */ 1425 break; 1426 case '2': /* Symlink */ 1427 archive_entry_set_link_to_symlink(entry); 1428 existing_wcs_linkpath = archive_entry_symlink_w(entry); 1429 existing_linkpath = archive_entry_symlink(entry); 1430 if ((existing_linkpath == NULL || existing_linkpath[0] == '\0') 1431 && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) { 1432 struct archive_string linkpath; 1433 archive_string_init(&linkpath); 1434 archive_strncpy(&linkpath, 1435 header->linkname, sizeof(header->linkname)); 1436 if (archive_entry_copy_symlink_l(entry, linkpath.s, 1437 archive_strlen(&linkpath), tar->sconv) != 0) { 1438 err = set_conversion_failed_error(a, tar->sconv, 1439 "Linkname"); 1440 if (err == ARCHIVE_FATAL) { 1441 archive_string_free(&linkpath); 1442 return (err); 1443 } 1444 } 1445 archive_string_free(&linkpath); 1446 } 1447 archive_entry_set_filetype(entry, AE_IFLNK); 1448 archive_entry_set_size(entry, 0); 1449 tar->entry_bytes_remaining = 0; 1450 break; 1451 case '3': /* Character device */ 1452 archive_entry_set_filetype(entry, AE_IFCHR); 1453 archive_entry_set_size(entry, 0); 1454 tar->entry_bytes_remaining = 0; 1455 break; 1456 case '4': /* Block device */ 1457 archive_entry_set_filetype(entry, AE_IFBLK); 1458 archive_entry_set_size(entry, 0); 1459 tar->entry_bytes_remaining = 0; 1460 break; 1461 case '5': /* Dir */ 1462 archive_entry_set_filetype(entry, AE_IFDIR); 1463 archive_entry_set_size(entry, 0); 1464 tar->entry_bytes_remaining = 0; 1465 break; 1466 case '6': /* FIFO device */ 1467 archive_entry_set_filetype(entry, AE_IFIFO); 1468 archive_entry_set_size(entry, 0); 1469 tar->entry_bytes_remaining = 0; 1470 break; 1471 case 'D': /* GNU incremental directory type */ 1472 /* 1473 * No special handling is actually required here. 1474 * It might be nice someday to preprocess the file list and 1475 * provide it to the client, though. 1476 */ 1477 archive_entry_set_filetype(entry, AE_IFDIR); 1478 break; 1479 case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ 1480 /* 1481 * As far as I can tell, this is just like a regular file 1482 * entry, except that the contents should be _appended_ to 1483 * the indicated file at the indicated offset. This may 1484 * require some API work to fully support. 1485 */ 1486 break; 1487 case 'N': /* Old GNU "long filename" entry. */ 1488 /* The body of this entry is a script for renaming 1489 * previously-extracted entries. Ugh. It will never 1490 * be supported by libarchive. */ 1491 archive_entry_set_filetype(entry, AE_IFREG); 1492 break; 1493 case 'S': /* GNU sparse files */ 1494 /* 1495 * Sparse files are really just regular files with 1496 * sparse information in the extended area. 1497 */ 1498 /* FALLTHROUGH */ 1499 case '0': /* ustar "regular" file */ 1500 /* FALLTHROUGH */ 1501 default: /* Non-standard file types */ 1502 /* 1503 * Per POSIX: non-recognized types should always be 1504 * treated as regular files. 1505 */ 1506 archive_entry_set_filetype(entry, AE_IFREG); 1507 break; 1508 } 1509 return (err); 1510 } 1511 1512 /* 1513 * Parse out header elements for "old-style" tar archives. 1514 */ 1515 static int 1516 header_old_tar(struct archive_read *a, struct tar *tar, 1517 struct archive_entry *entry, const void *h) 1518 { 1519 const struct archive_entry_header_ustar *header; 1520 int err = ARCHIVE_OK, err2; 1521 1522 /* 1523 * Copy filename over (to ensure null termination). 1524 * Skip if pathname was already set e.g. by header_gnu_longname() 1525 */ 1526 header = (const struct archive_entry_header_ustar *)h; 1527 1528 const char *existing_pathname = archive_entry_pathname(entry); 1529 const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry); 1530 if ((existing_pathname == NULL || existing_pathname[0] == '\0') 1531 && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0') && 1532 archive_entry_copy_pathname_l(entry, 1533 header->name, sizeof(header->name), tar->sconv) != 0) { 1534 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1535 if (err == ARCHIVE_FATAL) 1536 return (err); 1537 } 1538 1539 /* Grab rest of common fields */ 1540 err2 = header_common(a, tar, entry, h); 1541 if (err > err2) 1542 err = err2; 1543 1544 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1545 return (err); 1546 } 1547 1548 /* 1549 * Is this likely an AppleDouble extension? 1550 */ 1551 static int 1552 is_mac_metadata_entry(struct archive_entry *entry) { 1553 const char *p, *name; 1554 const wchar_t *wp, *wname; 1555 1556 wname = wp = archive_entry_pathname_w(entry); 1557 if (wp != NULL) { 1558 /* Find the last path element. */ 1559 for (; *wp != L'\0'; ++wp) { 1560 if (wp[0] == '/' && wp[1] != L'\0') 1561 wname = wp + 1; 1562 } 1563 /* 1564 * If last path element starts with "._", then 1565 * this is a Mac extension. 1566 */ 1567 if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0') 1568 return 1; 1569 } else { 1570 /* Find the last path element. */ 1571 name = p = archive_entry_pathname(entry); 1572 if (p == NULL) 1573 return (ARCHIVE_FAILED); 1574 for (; *p != '\0'; ++p) { 1575 if (p[0] == '/' && p[1] != '\0') 1576 name = p + 1; 1577 } 1578 /* 1579 * If last path element starts with "._", then 1580 * this is a Mac extension. 1581 */ 1582 if (name[0] == '.' && name[1] == '_' && name[2] != '\0') 1583 return 1; 1584 } 1585 /* Not a mac extension */ 1586 return 0; 1587 } 1588 1589 /* 1590 * Read a Mac AppleDouble-encoded blob of file metadata, 1591 * if there is one. 1592 * 1593 * TODO: In Libarchive 4, we should consider ripping this 1594 * out -- instead, return a file starting with `._` as 1595 * a regular file and let the client (or archive_write logic) 1596 * handle it. 1597 */ 1598 static int 1599 read_mac_metadata_blob(struct archive_read *a, 1600 struct archive_entry *entry, size_t *unconsumed) 1601 { 1602 int64_t size; 1603 size_t msize; 1604 const void *data; 1605 1606 /* Read the body as a Mac OS metadata blob. */ 1607 size = archive_entry_size(entry); 1608 msize = (size_t)size; 1609 if (size < 0 || (uintmax_t)msize != (uintmax_t)size) { 1610 *unconsumed = 0; 1611 return (ARCHIVE_FATAL); 1612 } 1613 1614 /* TODO: Should this merely skip the overlarge entry and 1615 * WARN? Or is xattr_limit sufficiently large that we can 1616 * safely assume anything larger is malicious? */ 1617 if (size > (int64_t)xattr_limit) { 1618 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1619 "Oversized AppleDouble extension has size %llu > %llu", 1620 (unsigned long long)size, 1621 (unsigned long long)xattr_limit); 1622 return (ARCHIVE_FATAL); 1623 } 1624 1625 /* 1626 * TODO: Look beyond the body here to peek at the next header. 1627 * If it's a regular header (not an extension header) 1628 * that has the wrong name, just return the current 1629 * entry as-is, without consuming the body here. 1630 * That would reduce the risk of us mis-identifying 1631 * an ordinary file that just happened to have 1632 * a name starting with "._". 1633 * 1634 * Q: Is the above idea really possible? Even 1635 * when there are GNU or pax extension entries? 1636 */ 1637 tar_flush_unconsumed(a, unconsumed); 1638 data = __archive_read_ahead(a, msize, NULL); 1639 if (data == NULL) { 1640 archive_set_error(&a->archive, EINVAL, 1641 "Truncated archive" 1642 " detected while reading macOS metadata"); 1643 *unconsumed = 0; 1644 return (ARCHIVE_FATAL); 1645 } 1646 archive_entry_clear(entry); 1647 archive_entry_copy_mac_metadata(entry, data, msize); 1648 *unconsumed = (msize + 511) & ~ 511; 1649 return (ARCHIVE_OK); 1650 } 1651 1652 /* 1653 * Parse a file header for a pax extended archive entry. 1654 */ 1655 static int 1656 header_pax_global(struct archive_read *a, struct tar *tar, 1657 struct archive_entry *entry, const void *h, size_t *unconsumed) 1658 { 1659 const struct archive_entry_header_ustar *header; 1660 int64_t size, to_consume; 1661 1662 (void)a; /* UNUSED */ 1663 (void)tar; /* UNUSED */ 1664 (void)entry; /* UNUSED */ 1665 1666 header = (const struct archive_entry_header_ustar *)h; 1667 size = tar_atol(header->size, sizeof(header->size)); 1668 if (size > entry_limit) { 1669 return (ARCHIVE_FATAL); 1670 } 1671 to_consume = ((size + 511) & ~511); 1672 *unconsumed += to_consume; 1673 return (ARCHIVE_OK); 1674 } 1675 1676 /* 1677 * Parse a file header for a Posix "ustar" archive entry. This also 1678 * handles "pax" or "extended ustar" entries. 1679 * 1680 * In order to correctly handle pax attributes (which precede this), 1681 * we have to skip parsing any field for which the entry already has 1682 * contents. 1683 */ 1684 static int 1685 header_ustar(struct archive_read *a, struct tar *tar, 1686 struct archive_entry *entry, const void *h) 1687 { 1688 const struct archive_entry_header_ustar *header; 1689 struct archive_string as; 1690 int err = ARCHIVE_OK, r; 1691 1692 header = (const struct archive_entry_header_ustar *)h; 1693 1694 /* Copy name into an internal buffer to ensure null-termination. */ 1695 const char *existing_pathname = archive_entry_pathname(entry); 1696 const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry); 1697 if ((existing_pathname == NULL || existing_pathname[0] == '\0') 1698 && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0')) { 1699 archive_string_init(&as); 1700 if (header->prefix[0]) { 1701 archive_strncpy(&as, header->prefix, sizeof(header->prefix)); 1702 if (as.s[archive_strlen(&as) - 1] != '/') 1703 archive_strappend_char(&as, '/'); 1704 archive_strncat(&as, header->name, sizeof(header->name)); 1705 } else { 1706 archive_strncpy(&as, header->name, sizeof(header->name)); 1707 } 1708 if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as), 1709 tar->sconv) != 0) { 1710 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1711 if (err == ARCHIVE_FATAL) 1712 return (err); 1713 } 1714 archive_string_free(&as); 1715 } 1716 1717 /* Handle rest of common fields. */ 1718 r = header_common(a, tar, entry, h); 1719 if (r == ARCHIVE_FATAL) 1720 return (r); 1721 if (r < err) 1722 err = r; 1723 1724 /* Handle POSIX ustar fields. */ 1725 const char *existing_uname = archive_entry_uname(entry); 1726 if (existing_uname == NULL || existing_uname[0] == '\0') { 1727 if (archive_entry_copy_uname_l(entry, 1728 header->uname, sizeof(header->uname), tar->sconv) != 0) { 1729 err = set_conversion_failed_error(a, tar->sconv, "Uname"); 1730 if (err == ARCHIVE_FATAL) 1731 return (err); 1732 } 1733 } 1734 1735 const char *existing_gname = archive_entry_gname(entry); 1736 if (existing_gname == NULL || existing_gname[0] == '\0') { 1737 if (archive_entry_copy_gname_l(entry, 1738 header->gname, sizeof(header->gname), tar->sconv) != 0) { 1739 err = set_conversion_failed_error(a, tar->sconv, "Gname"); 1740 if (err == ARCHIVE_FATAL) 1741 return (err); 1742 } 1743 } 1744 1745 /* Parse out device numbers only for char and block specials. */ 1746 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 1747 if (!archive_entry_rdev_is_set(entry)) { 1748 archive_entry_set_rdevmajor(entry, (dev_t) 1749 tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 1750 archive_entry_set_rdevminor(entry, (dev_t) 1751 tar_atol(header->rdevminor, sizeof(header->rdevminor))); 1752 } 1753 } else { 1754 archive_entry_set_rdev(entry, 0); 1755 } 1756 1757 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1758 1759 return (err); 1760 } 1761 1762 static int 1763 header_pax_extension(struct archive_read *a, struct tar *tar, 1764 struct archive_entry *entry, const void *h, size_t *unconsumed) 1765 { 1766 /* Sanity checks: The largest `x` body I've ever heard of was 1767 * a little over 4MB. So I doubt there has ever been a 1768 * well-formed archive with an `x` body over 1GiB. Similarly, 1769 * it seems plausible that no single attribute has ever been 1770 * larger than 100MB. So if we see a larger value here, it's 1771 * almost certainly a sign of a corrupted/malicious archive. */ 1772 1773 /* Maximum sane size for extension body: 1 GiB */ 1774 /* This cannot be raised to larger than 8GiB without 1775 * exceeding the maximum size for a standard ustar 1776 * entry. */ 1777 const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024; 1778 /* Maximum size for a single line/attr: 100 million characters */ 1779 /* This cannot be raised to more than 2GiB without exceeding 1780 * a `size_t` on 32-bit platforms. */ 1781 const size_t max_parsed_line_length = 99999999ULL; 1782 /* Largest attribute prolog: size + name. */ 1783 const size_t max_size_name = 512; 1784 1785 /* Size and padding of the full extension body */ 1786 int64_t ext_size, ext_padding; 1787 size_t line_length, value_length, name_length; 1788 ssize_t to_read, did_read; 1789 const struct archive_entry_header_ustar *header; 1790 const char *p, *attr_start, *name_start; 1791 struct archive_string_conv *sconv; 1792 struct archive_string *pas = NULL; 1793 struct archive_string attr_name; 1794 int err = ARCHIVE_OK, r; 1795 1796 header = (const struct archive_entry_header_ustar *)h; 1797 ext_size = tar_atol(header->size, sizeof(header->size)); 1798 if (ext_size > entry_limit) { 1799 return (ARCHIVE_FATAL); 1800 } 1801 if (ext_size < 0) { 1802 archive_set_error(&a->archive, EINVAL, 1803 "pax extension header has invalid size: %lld", 1804 (long long)ext_size); 1805 return (ARCHIVE_FATAL); 1806 } 1807 1808 ext_padding = 0x1ff & (-ext_size); 1809 if (ext_size > ext_size_limit) { 1810 /* Consume the pax extension body and return an error */ 1811 if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) { 1812 return (ARCHIVE_FATAL); 1813 } 1814 archive_set_error(&a->archive, EINVAL, 1815 "Ignoring oversized pax extensions: %d > %d", 1816 (int)ext_size, (int)ext_size_limit); 1817 return (ARCHIVE_WARN); 1818 } 1819 tar_flush_unconsumed(a, unconsumed); 1820 1821 /* Parse the size/name of each pax attribute in the body */ 1822 archive_string_init(&attr_name); 1823 while (ext_size > 0) { 1824 /* Read enough bytes to parse the size/name of the next attribute */ 1825 to_read = max_size_name; 1826 if (to_read > ext_size) { 1827 to_read = ext_size; 1828 } 1829 p = __archive_read_ahead(a, to_read, &did_read); 1830 if (p == NULL) { /* EOF */ 1831 archive_set_error(&a->archive, EINVAL, 1832 "Truncated tar archive" 1833 " detected while reading pax attribute name"); 1834 return (ARCHIVE_FATAL); 1835 } 1836 if (did_read > ext_size) { 1837 did_read = ext_size; 1838 } 1839 1840 /* Parse size of attribute */ 1841 line_length = 0; 1842 attr_start = p; 1843 while (1) { 1844 if (p >= attr_start + did_read) { 1845 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1846 "Ignoring malformed pax attributes: overlarge attribute size field"); 1847 *unconsumed += ext_size + ext_padding; 1848 return (ARCHIVE_WARN); 1849 } 1850 if (*p == ' ') { 1851 p++; 1852 break; 1853 } 1854 if (*p < '0' || *p > '9') { 1855 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1856 "Ignoring malformed pax attributes: malformed attribute size field"); 1857 *unconsumed += ext_size + ext_padding; 1858 return (ARCHIVE_WARN); 1859 } 1860 line_length *= 10; 1861 line_length += *p - '0'; 1862 if (line_length > max_parsed_line_length) { 1863 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1864 "Ignoring malformed pax attribute: size > %lld", 1865 (long long)max_parsed_line_length); 1866 *unconsumed += ext_size + ext_padding; 1867 return (ARCHIVE_WARN); 1868 } 1869 p++; 1870 } 1871 1872 if ((int64_t)line_length > ext_size) { 1873 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1874 "Ignoring malformed pax attribute: %lld > %lld", 1875 (long long)line_length, (long long)ext_size); 1876 *unconsumed += ext_size + ext_padding; 1877 return (ARCHIVE_WARN); 1878 } 1879 1880 /* Parse name of attribute */ 1881 if (p >= attr_start + did_read 1882 || p >= attr_start + line_length 1883 || *p == '=') { 1884 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1885 "Ignoring malformed pax attributes: empty name found"); 1886 *unconsumed += ext_size + ext_padding; 1887 return (ARCHIVE_WARN); 1888 } 1889 name_start = p; 1890 while (1) { 1891 if (p >= attr_start + did_read || p >= attr_start + line_length) { 1892 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1893 "Ignoring malformed pax attributes: overlarge attribute name"); 1894 *unconsumed += ext_size + ext_padding; 1895 return (ARCHIVE_WARN); 1896 } 1897 if (*p == '=') { 1898 break; 1899 } 1900 p++; 1901 } 1902 name_length = p - name_start; 1903 p++; // Skip '=' 1904 1905 // Save the name before we consume it 1906 archive_strncpy(&attr_name, name_start, name_length); 1907 1908 ext_size -= p - attr_start; 1909 value_length = line_length - (p - attr_start); 1910 1911 /* Consume size, name, and `=` */ 1912 *unconsumed += p - attr_start; 1913 tar_flush_unconsumed(a, unconsumed); 1914 1915 /* pax_attribute will consume value_length - 1 */ 1916 r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed); 1917 ext_size -= value_length - 1; 1918 1919 // Release the allocated attr_name (either here or before every return in this function) 1920 archive_string_free(&attr_name); 1921 1922 if (r < ARCHIVE_WARN) { 1923 *unconsumed += ext_size + ext_padding; 1924 return (r); 1925 } 1926 err = err_combine(err, r); 1927 1928 /* Consume the `\n` that follows the pax attribute value. */ 1929 tar_flush_unconsumed(a, unconsumed); 1930 p = __archive_read_ahead(a, 1, &did_read); 1931 if (p == NULL) { 1932 archive_set_error(&a->archive, EINVAL, 1933 "Truncated tar archive" 1934 " detected while completing pax attribute"); 1935 return (ARCHIVE_FATAL); 1936 } 1937 if (p[0] != '\n') { 1938 archive_set_error(&a->archive, EINVAL, 1939 "Malformed pax attributes"); 1940 *unconsumed += ext_size + ext_padding; 1941 return (ARCHIVE_WARN); 1942 } 1943 ext_size -= 1; 1944 *unconsumed += 1; 1945 tar_flush_unconsumed(a, unconsumed); 1946 } 1947 *unconsumed += ext_size + ext_padding; 1948 1949 /* 1950 * Some PAX values -- pathname, linkpath, uname, gname -- 1951 * can't be copied into the entry until we know the character 1952 * set to use: 1953 */ 1954 if (!tar->pax_hdrcharset_utf8) 1955 /* PAX specified "BINARY", so use the default charset */ 1956 sconv = tar->opt_sconv; 1957 else { 1958 /* PAX default UTF-8 */ 1959 sconv = archive_string_conversion_from_charset( 1960 &(a->archive), "UTF-8", 1); 1961 if (sconv == NULL) 1962 return (ARCHIVE_FATAL); 1963 if (tar->compat_2x) 1964 archive_string_conversion_set_opt(sconv, 1965 SCONV_SET_OPT_UTF8_LIBARCHIVE2X); 1966 } 1967 1968 /* Pathname */ 1969 pas = NULL; 1970 if (archive_strlen(&(tar->entry_pathname_override)) > 0) { 1971 /* Prefer GNU.sparse.name attribute if present */ 1972 /* GNU sparse files store a fake name under the standard 1973 * "pathname" key. */ 1974 pas = &(tar->entry_pathname_override); 1975 } else if (archive_strlen(&(tar->entry_pathname)) > 0) { 1976 /* Use standard "pathname" PAX extension */ 1977 pas = &(tar->entry_pathname); 1978 } 1979 if (pas != NULL) { 1980 if (archive_entry_copy_pathname_l(entry, pas->s, 1981 archive_strlen(pas), sconv) != 0) { 1982 err = set_conversion_failed_error(a, sconv, "Pathname"); 1983 if (err == ARCHIVE_FATAL) 1984 return (err); 1985 /* Use raw name without conversion */ 1986 archive_entry_copy_pathname(entry, pas->s); 1987 } 1988 } 1989 /* Uname */ 1990 if (archive_strlen(&(tar->entry_uname)) > 0) { 1991 if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, 1992 archive_strlen(&(tar->entry_uname)), sconv) != 0) { 1993 err = set_conversion_failed_error(a, sconv, "Uname"); 1994 if (err == ARCHIVE_FATAL) 1995 return (err); 1996 /* Use raw name without conversion */ 1997 archive_entry_copy_uname(entry, tar->entry_uname.s); 1998 } 1999 } 2000 /* Gname */ 2001 if (archive_strlen(&(tar->entry_gname)) > 0) { 2002 if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, 2003 archive_strlen(&(tar->entry_gname)), sconv) != 0) { 2004 err = set_conversion_failed_error(a, sconv, "Gname"); 2005 if (err == ARCHIVE_FATAL) 2006 return (err); 2007 /* Use raw name without conversion */ 2008 archive_entry_copy_gname(entry, tar->entry_gname.s); 2009 } 2010 } 2011 /* Linkpath */ 2012 if (archive_strlen(&(tar->entry_linkpath)) > 0) { 2013 if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, 2014 archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { 2015 err = set_conversion_failed_error(a, sconv, "Linkpath"); 2016 if (err == ARCHIVE_FATAL) 2017 return (err); 2018 /* Use raw name without conversion */ 2019 archive_entry_copy_link(entry, tar->entry_linkpath.s); 2020 } 2021 } 2022 2023 /* Extension may have given us a corrected `entry_bytes_remaining` for 2024 * the main entry; update the padding appropriately. */ 2025 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 2026 return (err); 2027 } 2028 2029 static int 2030 pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry, 2031 const char *name, size_t name_length, const char *value, size_t value_length) 2032 { 2033 char *name_decoded; 2034 void *value_decoded; 2035 size_t value_len; 2036 2037 if (name_length < 1) 2038 return 3; 2039 2040 /* URL-decode name */ 2041 name_decoded = url_decode(name, name_length); 2042 if (name_decoded == NULL) 2043 return 2; 2044 2045 /* Base-64 decode value */ 2046 value_decoded = base64_decode(value, value_length, &value_len); 2047 if (value_decoded == NULL) { 2048 free(name_decoded); 2049 return 1; 2050 } 2051 2052 archive_entry_xattr_add_entry(entry, name_decoded, 2053 value_decoded, value_len); 2054 2055 free(name_decoded); 2056 free(value_decoded); 2057 return 0; 2058 } 2059 2060 static int 2061 pax_attribute_SCHILY_xattr(struct archive_entry *entry, 2062 const char *name, size_t name_length, const char *value, size_t value_length) 2063 { 2064 if (name_length < 1 || name_length > 128) { 2065 return 1; 2066 } 2067 2068 char * null_terminated_name = malloc(name_length + 1); 2069 if (null_terminated_name != NULL) { 2070 memcpy(null_terminated_name, name, name_length); 2071 null_terminated_name[name_length] = '\0'; 2072 archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length); 2073 free(null_terminated_name); 2074 } 2075 2076 return 0; 2077 } 2078 2079 static int 2080 pax_attribute_RHT_security_selinux(struct archive_entry *entry, 2081 const char *value, size_t value_length) 2082 { 2083 archive_entry_xattr_add_entry(entry, "security.selinux", 2084 value, value_length); 2085 2086 return 0; 2087 } 2088 2089 static int 2090 pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar, 2091 struct archive_entry *entry, size_t value_length, int type) 2092 { 2093 int r; 2094 const char *p; 2095 const char* errstr; 2096 2097 switch (type) { 2098 case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: 2099 errstr = "SCHILY.acl.access"; 2100 break; 2101 case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: 2102 errstr = "SCHILY.acl.default"; 2103 break; 2104 case ARCHIVE_ENTRY_ACL_TYPE_NFS4: 2105 errstr = "SCHILY.acl.ace"; 2106 break; 2107 default: 2108 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2109 "Unknown ACL type: %d", type); 2110 return(ARCHIVE_FATAL); 2111 } 2112 2113 if (tar->sconv_acl == NULL) { 2114 tar->sconv_acl = 2115 archive_string_conversion_from_charset( 2116 &(a->archive), "UTF-8", 1); 2117 if (tar->sconv_acl == NULL) 2118 return (ARCHIVE_FATAL); 2119 } 2120 2121 if (value_length > acl_limit) { 2122 __archive_read_consume(a, value_length); 2123 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2124 "Unreasonably large ACL: %d > %d", 2125 (int)value_length, (int)acl_limit); 2126 return (ARCHIVE_WARN); 2127 } 2128 2129 p = __archive_read_ahead(a, value_length, NULL); 2130 if (p == NULL) { 2131 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2132 "Truncated tar archive " 2133 "detected while reading ACL data"); 2134 return (ARCHIVE_FATAL); 2135 } 2136 2137 r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length, 2138 type, tar->sconv_acl); 2139 __archive_read_consume(a, value_length); 2140 /* Workaround: Force perm_is_set() to be correct */ 2141 /* If this bit were stored in the ACL, this wouldn't be needed */ 2142 archive_entry_set_perm(entry, archive_entry_perm(entry)); 2143 if (r != ARCHIVE_OK) { 2144 if (r == ARCHIVE_FATAL) { 2145 archive_set_error(&a->archive, ENOMEM, 2146 "%s %s", "Can't allocate memory for ", 2147 errstr); 2148 return (r); 2149 } 2150 archive_set_error(&a->archive, 2151 ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr); 2152 } 2153 return (r); 2154 } 2155 2156 static int 2157 pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) { 2158 struct archive_string as; 2159 int r; 2160 2161 if (value_length > 128) { 2162 __archive_read_consume(a, value_length); 2163 *ps = 0; 2164 *pn = 0; 2165 return (ARCHIVE_FATAL); 2166 } 2167 2168 archive_string_init(&as); 2169 r = read_bytes_to_string(a, &as, value_length, unconsumed); 2170 if (r < ARCHIVE_OK) { 2171 archive_string_free(&as); 2172 return (r); 2173 } 2174 2175 pax_time(as.s, archive_strlen(&as), ps, pn); 2176 archive_string_free(&as); 2177 if (*ps < 0 || *ps == INT64_MAX) { 2178 return (ARCHIVE_WARN); 2179 } 2180 return (ARCHIVE_OK); 2181 } 2182 2183 static int 2184 pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) { 2185 struct archive_string as; 2186 size_t unconsumed = 0; 2187 int r; 2188 2189 if (value_length > 64) { 2190 __archive_read_consume(a, value_length); 2191 *result = 0; 2192 return (ARCHIVE_FATAL); 2193 } 2194 2195 archive_string_init(&as); 2196 r = read_bytes_to_string(a, &as, value_length, &unconsumed); 2197 tar_flush_unconsumed(a, &unconsumed); 2198 if (r < ARCHIVE_OK) { 2199 archive_string_free(&as); 2200 return (r); 2201 } 2202 2203 *result = tar_atol10(as.s, archive_strlen(&as)); 2204 archive_string_free(&as); 2205 if (*result < 0 || *result == INT64_MAX) { 2206 *result = INT64_MAX; 2207 return (ARCHIVE_WARN); 2208 } 2209 return (ARCHIVE_OK); 2210 } 2211 2212 /* 2213 * Parse a single key=value attribute. 2214 * 2215 * POSIX reserves all-lowercase keywords. Vendor-specific extensions 2216 * should always have keywords of the form "VENDOR.attribute" In 2217 * particular, it's quite feasible to support many different vendor 2218 * extensions here. I'm using "LIBARCHIVE" for extensions unique to 2219 * this library. 2220 * 2221 * TODO: Investigate other vendor-specific extensions and see if 2222 * any of them look useful. 2223 */ 2224 static int 2225 pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry, 2226 const char *key, size_t key_length, size_t value_length, size_t *unconsumed) 2227 { 2228 int64_t t; 2229 long n; 2230 const char *p; 2231 ssize_t bytes_read; 2232 int err = ARCHIVE_OK; 2233 2234 switch (key[0]) { 2235 case 'G': 2236 /* GNU.* extensions */ 2237 if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) { 2238 key += 4; 2239 key_length -= 4; 2240 2241 /* GNU.sparse marks the existence of GNU sparse information */ 2242 if (key_length == 6 && memcmp(key, "sparse", 6) == 0) { 2243 tar->sparse_gnu_attributes_seen = 1; 2244 } 2245 2246 /* GNU.sparse.* extensions */ 2247 else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) { 2248 tar->sparse_gnu_attributes_seen = 1; 2249 key += 7; 2250 key_length -= 7; 2251 2252 /* GNU "0.0" sparse pax format. */ 2253 if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) { 2254 /* GNU.sparse.numblocks */ 2255 tar->sparse_offset = -1; 2256 tar->sparse_numbytes = -1; 2257 tar->sparse_gnu_major = 0; 2258 tar->sparse_gnu_minor = 0; 2259 } 2260 else if (key_length == 6 && memcmp(key, "offset", 6) == 0) { 2261 /* GNU.sparse.offset */ 2262 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2263 tar->sparse_offset = t; 2264 if (tar->sparse_numbytes != -1) { 2265 if (gnu_add_sparse_entry(a, tar, 2266 tar->sparse_offset, tar->sparse_numbytes) 2267 != ARCHIVE_OK) 2268 return (ARCHIVE_FATAL); 2269 tar->sparse_offset = -1; 2270 tar->sparse_numbytes = -1; 2271 } 2272 } 2273 return (err); 2274 } 2275 else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) { 2276 /* GNU.sparse.numbytes */ 2277 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2278 tar->sparse_numbytes = t; 2279 if (tar->sparse_offset != -1) { 2280 if (gnu_add_sparse_entry(a, tar, 2281 tar->sparse_offset, tar->sparse_numbytes) 2282 != ARCHIVE_OK) 2283 return (ARCHIVE_FATAL); 2284 tar->sparse_offset = -1; 2285 tar->sparse_numbytes = -1; 2286 } 2287 } 2288 return (err); 2289 } 2290 else if (key_length == 4 && memcmp(key, "size", 4) == 0) { 2291 /* GNU.sparse.size */ 2292 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2293 tar->realsize = t; 2294 archive_entry_set_size(entry, tar->realsize); 2295 tar->realsize_override = 1; 2296 } 2297 return (err); 2298 } 2299 2300 /* GNU "0.1" sparse pax format. */ 2301 else if (key_length == 3 && memcmp(key, "map", 3) == 0) { 2302 /* GNU.sparse.map */ 2303 tar->sparse_gnu_major = 0; 2304 tar->sparse_gnu_minor = 1; 2305 if (value_length > sparse_map_limit) { 2306 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2307 "Unreasonably large sparse map: %d > %d", 2308 (int)value_length, (int)sparse_map_limit); 2309 err = ARCHIVE_FAILED; 2310 } else { 2311 p = __archive_read_ahead(a, value_length, &bytes_read); 2312 if (p == NULL) { 2313 archive_set_error(&a->archive, EINVAL, 2314 "Truncated archive" 2315 " detected while reading GNU sparse data"); 2316 return (ARCHIVE_FATAL); 2317 } 2318 if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) { 2319 err = ARCHIVE_WARN; 2320 } 2321 } 2322 __archive_read_consume(a, value_length); 2323 return (err); 2324 } 2325 2326 /* GNU "1.0" sparse pax format */ 2327 else if (key_length == 5 && memcmp(key, "major", 5) == 0) { 2328 /* GNU.sparse.major */ 2329 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK 2330 && t >= 0 2331 && t <= 10) { 2332 tar->sparse_gnu_major = (int)t; 2333 } 2334 return (err); 2335 } 2336 else if (key_length == 5 && memcmp(key, "minor", 5) == 0) { 2337 /* GNU.sparse.minor */ 2338 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK 2339 && t >= 0 2340 && t <= 10) { 2341 tar->sparse_gnu_minor = (int)t; 2342 } 2343 return (err); 2344 } 2345 else if (key_length == 4 && memcmp(key, "name", 4) == 0) { 2346 /* GNU.sparse.name */ 2347 /* 2348 * The real filename; when storing sparse 2349 * files, GNU tar puts a synthesized name into 2350 * the regular 'path' attribute in an attempt 2351 * to limit confusion. ;-) 2352 */ 2353 if (value_length > pathname_limit) { 2354 *unconsumed += value_length; 2355 err = ARCHIVE_WARN; 2356 } else { 2357 err = read_bytes_to_string(a, &(tar->entry_pathname_override), 2358 value_length, unconsumed); 2359 } 2360 return (err); 2361 } 2362 else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) { 2363 /* GNU.sparse.realsize */ 2364 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2365 tar->realsize = t; 2366 archive_entry_set_size(entry, tar->realsize); 2367 tar->realsize_override = 1; 2368 } 2369 return (err); 2370 } 2371 } 2372 } 2373 break; 2374 case 'L': 2375 /* LIBARCHIVE extensions */ 2376 if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) { 2377 key_length -= 11; 2378 key += 11; 2379 2380 /* TODO: Handle arbitrary extended attributes... */ 2381 /* 2382 if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) 2383 archive_entry_set_xxxxxx(entry, value); 2384 */ 2385 if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) { 2386 /* LIBARCHIVE.creationtime */ 2387 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { 2388 archive_entry_set_birthtime(entry, t, n); 2389 } 2390 return (err); 2391 } 2392 else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) { 2393 /* LIBARCHIVE.symlinktype */ 2394 if (value_length < 16) { 2395 p = __archive_read_ahead(a, value_length, &bytes_read); 2396 if (p == NULL) { 2397 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2398 "Truncated tar archive " 2399 "detected while reading `symlinktype` attribute"); 2400 return (ARCHIVE_FATAL); 2401 } 2402 if (value_length == 4 && memcmp(p, "file", 4) == 0) { 2403 archive_entry_set_symlink_type(entry, 2404 AE_SYMLINK_TYPE_FILE); 2405 } else if (value_length == 3 && memcmp(p, "dir", 3) == 0) { 2406 archive_entry_set_symlink_type(entry, 2407 AE_SYMLINK_TYPE_DIRECTORY); 2408 } else { 2409 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2410 "Unrecognized symlink type"); 2411 err = ARCHIVE_WARN; 2412 } 2413 } else { 2414 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2415 "symlink type is very long" 2416 "(longest recognized value is 4 bytes, this is %d)", 2417 (int)value_length); 2418 err = ARCHIVE_WARN; 2419 } 2420 __archive_read_consume(a, value_length); 2421 return (err); 2422 } 2423 else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) { 2424 key_length -= 6; 2425 key += 6; 2426 if (value_length > xattr_limit) { 2427 err = ARCHIVE_WARN; 2428 } else { 2429 p = __archive_read_ahead(a, value_length, &bytes_read); 2430 if (p == NULL) { 2431 archive_set_error(&a->archive, EINVAL, 2432 "Truncated archive" 2433 " detected while reading xattr information"); 2434 return (ARCHIVE_FATAL); 2435 } 2436 if (pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) { 2437 /* TODO: Unable to parse xattr */ 2438 err = ARCHIVE_WARN; 2439 } 2440 } 2441 __archive_read_consume(a, value_length); 2442 return (err); 2443 } 2444 } 2445 break; 2446 case 'R': 2447 /* GNU tar uses RHT.security header to store SELinux xattrs 2448 * SCHILY.xattr.security.selinux == RHT.security.selinux */ 2449 if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) { 2450 if (value_length > xattr_limit) { 2451 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2452 "Ignoring unreasonably large security.selinux attribute:" 2453 " %d > %d", 2454 (int)value_length, (int)xattr_limit); 2455 /* TODO: Should this be FAILED instead? */ 2456 err = ARCHIVE_WARN; 2457 } else { 2458 p = __archive_read_ahead(a, value_length, &bytes_read); 2459 if (p == NULL) { 2460 archive_set_error(&a->archive, EINVAL, 2461 "Truncated archive" 2462 " detected while reading selinux data"); 2463 return (ARCHIVE_FATAL); 2464 } 2465 if (pax_attribute_RHT_security_selinux(entry, p, value_length)) { 2466 /* TODO: Unable to parse xattr */ 2467 err = ARCHIVE_WARN; 2468 } 2469 } 2470 __archive_read_consume(a, value_length); 2471 return (err); 2472 } 2473 break; 2474 case 'S': 2475 /* SCHILY.* extensions used by "star" archiver */ 2476 if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) { 2477 key_length -= 7; 2478 key += 7; 2479 2480 if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) { 2481 err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, 2482 ARCHIVE_ENTRY_ACL_TYPE_ACCESS); 2483 // TODO: Mark mode as set 2484 return (err); 2485 } 2486 else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) { 2487 err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, 2488 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); 2489 return (err); 2490 } 2491 else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) { 2492 err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, 2493 ARCHIVE_ENTRY_ACL_TYPE_NFS4); 2494 // TODO: Mark mode as set 2495 return (err); 2496 } 2497 else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) { 2498 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2499 archive_entry_set_rdevmajor(entry, (dev_t)t); 2500 } 2501 return (err); 2502 } 2503 else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) { 2504 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2505 archive_entry_set_rdevminor(entry, (dev_t)t); 2506 } 2507 return (err); 2508 } 2509 else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) { 2510 if (value_length < fflags_limit) { 2511 p = __archive_read_ahead(a, value_length, &bytes_read); 2512 if (p == NULL) { 2513 /* Truncated archive */ 2514 archive_set_error(&a->archive, EINVAL, 2515 "Truncated archive" 2516 " detected while reading SCHILY.fflags"); 2517 return (ARCHIVE_FATAL); 2518 } 2519 archive_entry_copy_fflags_text_len(entry, p, value_length); 2520 err = ARCHIVE_OK; 2521 } else { 2522 /* Overlong fflags field */ 2523 err = ARCHIVE_WARN; 2524 } 2525 __archive_read_consume(a, value_length); 2526 return (err); 2527 } 2528 else if (key_length == 3 && memcmp(key, "dev", 3) == 0) { 2529 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2530 archive_entry_set_dev(entry, (dev_t)t); 2531 } 2532 return (err); 2533 } 2534 else if (key_length == 3 && memcmp(key, "ino", 3) == 0) { 2535 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2536 archive_entry_set_ino(entry, t); 2537 } 2538 return (err); 2539 } 2540 else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) { 2541 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2542 archive_entry_set_nlink(entry, (unsigned int)t); 2543 } 2544 return (err); 2545 } 2546 else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) { 2547 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2548 tar->realsize = t; 2549 tar->realsize_override = 1; 2550 archive_entry_set_size(entry, tar->realsize); 2551 } 2552 return (err); 2553 } 2554 else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) { 2555 key_length -= 6; 2556 key += 6; 2557 if (value_length < xattr_limit) { 2558 p = __archive_read_ahead(a, value_length, &bytes_read); 2559 if (p == NULL) { 2560 archive_set_error(&a->archive, EINVAL, 2561 "Truncated archive" 2562 " detected while reading SCHILY.xattr"); 2563 return (ARCHIVE_FATAL); 2564 } 2565 if (pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) { 2566 /* TODO: Unable to parse xattr */ 2567 err = ARCHIVE_WARN; 2568 } 2569 } else { 2570 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2571 "Unreasonably large xattr: %d > %d", 2572 (int)value_length, (int)xattr_limit); 2573 err = ARCHIVE_WARN; 2574 } 2575 __archive_read_consume(a, value_length); 2576 return (err); 2577 } 2578 } 2579 /* SUN.* extensions from Solaris tar */ 2580 if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) { 2581 key_length -= 4; 2582 key += 4; 2583 2584 if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) { 2585 /* SUN.holesdata */ 2586 if (value_length < sparse_map_limit) { 2587 p = __archive_read_ahead(a, value_length, &bytes_read); 2588 if (p == NULL) { 2589 archive_set_error(&a->archive, EINVAL, 2590 "Truncated archive" 2591 " detected while reading SUN.holesdata"); 2592 return (ARCHIVE_FATAL); 2593 } 2594 err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length); 2595 if (err < ARCHIVE_OK) { 2596 archive_set_error(&a->archive, 2597 ARCHIVE_ERRNO_MISC, 2598 "Parse error: SUN.holesdata"); 2599 } 2600 } else { 2601 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 2602 "Unreasonably large sparse map: %d > %d", 2603 (int)value_length, (int)sparse_map_limit); 2604 err = ARCHIVE_FAILED; 2605 } 2606 __archive_read_consume(a, value_length); 2607 return (err); 2608 } 2609 } 2610 break; 2611 case 'a': 2612 if (key_length == 5 && memcmp(key, "atime", 5) == 0) { 2613 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { 2614 archive_entry_set_atime(entry, t, n); 2615 } 2616 return (err); 2617 } 2618 break; 2619 case 'c': 2620 if (key_length == 5 && memcmp(key, "ctime", 5) == 0) { 2621 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { 2622 archive_entry_set_ctime(entry, t, n); 2623 } 2624 return (err); 2625 } else if (key_length == 7 && memcmp(key, "charset", 7) == 0) { 2626 /* TODO: Publish charset information in entry. */ 2627 } else if (key_length == 7 && memcmp(key, "comment", 7) == 0) { 2628 /* TODO: Publish comment in entry. */ 2629 } 2630 break; 2631 case 'g': 2632 if (key_length == 3 && memcmp(key, "gid", 3) == 0) { 2633 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2634 archive_entry_set_gid(entry, t); 2635 } 2636 return (err); 2637 } else if (key_length == 5 && memcmp(key, "gname", 5) == 0) { 2638 if (value_length > guname_limit) { 2639 *unconsumed += value_length; 2640 err = ARCHIVE_WARN; 2641 } else { 2642 err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed); 2643 } 2644 return (err); 2645 } 2646 break; 2647 case 'h': 2648 if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) { 2649 if (value_length < 64) { 2650 p = __archive_read_ahead(a, value_length, &bytes_read); 2651 if (p == NULL) { 2652 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2653 "Truncated tar archive " 2654 "detected while reading hdrcharset attribute"); 2655 return (ARCHIVE_FATAL); 2656 } 2657 if (value_length == 6 2658 && memcmp(p, "BINARY", 6) == 0) { 2659 /* Binary mode. */ 2660 tar->pax_hdrcharset_utf8 = 0; 2661 err = ARCHIVE_OK; 2662 } else if (value_length == 23 2663 && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) { 2664 tar->pax_hdrcharset_utf8 = 1; 2665 err = ARCHIVE_OK; 2666 } else { 2667 /* TODO: Unrecognized character set */ 2668 err = ARCHIVE_WARN; 2669 } 2670 } else { 2671 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2672 "hdrcharset attribute is unreasonably large (%d bytes)", 2673 (int)value_length); 2674 err = ARCHIVE_WARN; 2675 } 2676 __archive_read_consume(a, value_length); 2677 return (err); 2678 } 2679 break; 2680 case 'l': 2681 /* pax interchange doesn't distinguish hardlink vs. symlink. */ 2682 if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) { 2683 if (value_length > pathname_limit) { 2684 *unconsumed += value_length; 2685 err = ARCHIVE_WARN; 2686 } else { 2687 err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed); 2688 } 2689 return (err); 2690 } 2691 break; 2692 case 'm': 2693 if (key_length == 5 && memcmp(key, "mtime", 5) == 0) { 2694 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { 2695 archive_entry_set_mtime(entry, t, n); 2696 } 2697 return (err); 2698 } 2699 break; 2700 case 'p': 2701 if (key_length == 4 && memcmp(key, "path", 4) == 0) { 2702 if (value_length > pathname_limit) { 2703 *unconsumed += value_length; 2704 err = ARCHIVE_WARN; 2705 } else { 2706 err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed); 2707 } 2708 return (err); 2709 } 2710 break; 2711 case 'r': 2712 /* POSIX has reserved 'realtime.*' */ 2713 break; 2714 case 's': 2715 /* POSIX has reserved 'security.*' */ 2716 /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ 2717 if (key_length == 4 && memcmp(key, "size", 4) == 0) { 2718 /* "size" is the size of the data in the entry. */ 2719 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2720 tar->entry_bytes_remaining = t; 2721 /* 2722 * The "size" pax header keyword always overrides the 2723 * "size" field in the tar header. 2724 * GNU.sparse.realsize, GNU.sparse.size and 2725 * SCHILY.realsize override this value. 2726 */ 2727 if (!tar->realsize_override) { 2728 archive_entry_set_size(entry, 2729 tar->entry_bytes_remaining); 2730 tar->realsize 2731 = tar->entry_bytes_remaining; 2732 } 2733 } 2734 else if (t == INT64_MAX) { 2735 /* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */ 2736 tar->entry_bytes_remaining = 0; 2737 archive_set_error(&a->archive, 2738 ARCHIVE_ERRNO_MISC, 2739 "Tar size attribute overflow"); 2740 return (ARCHIVE_FATAL); 2741 } 2742 return (err); 2743 } 2744 break; 2745 case 'u': 2746 if (key_length == 3 && memcmp(key, "uid", 3) == 0) { 2747 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { 2748 archive_entry_set_uid(entry, t); 2749 } 2750 return (err); 2751 } else if (key_length == 5 && memcmp(key, "uname", 5) == 0) { 2752 if (value_length > guname_limit) { 2753 *unconsumed += value_length; 2754 err = ARCHIVE_WARN; 2755 } else { 2756 err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed); 2757 } 2758 return (err); 2759 } 2760 break; 2761 } 2762 2763 /* Unrecognized key, just skip the entire value. */ 2764 __archive_read_consume(a, value_length); 2765 return (err); 2766 } 2767 2768 2769 2770 /* 2771 * parse a decimal time value, which may include a fractional portion 2772 */ 2773 static void 2774 pax_time(const char *p, size_t length, int64_t *ps, long *pn) 2775 { 2776 char digit; 2777 int64_t s; 2778 unsigned long l; 2779 int sign; 2780 int64_t limit, last_digit_limit; 2781 2782 limit = INT64_MAX / 10; 2783 last_digit_limit = INT64_MAX % 10; 2784 2785 if (length <= 0) { 2786 *ps = 0; 2787 return; 2788 } 2789 s = 0; 2790 sign = 1; 2791 if (*p == '-') { 2792 sign = -1; 2793 p++; 2794 length--; 2795 } 2796 while (length > 0 && *p >= '0' && *p <= '9') { 2797 digit = *p - '0'; 2798 if (s > limit || 2799 (s == limit && digit > last_digit_limit)) { 2800 s = INT64_MAX; 2801 break; 2802 } 2803 s = (s * 10) + digit; 2804 ++p; 2805 --length; 2806 } 2807 2808 *ps = s * sign; 2809 2810 /* Calculate nanoseconds. */ 2811 *pn = 0; 2812 2813 if (length <= 0 || *p != '.') 2814 return; 2815 2816 l = 100000000UL; 2817 do { 2818 ++p; 2819 --length; 2820 if (length > 0 && *p >= '0' && *p <= '9') 2821 *pn += (*p - '0') * l; 2822 else 2823 break; 2824 } while (l /= 10); 2825 } 2826 2827 /* 2828 * Parse GNU tar header 2829 */ 2830 static int 2831 header_gnutar(struct archive_read *a, struct tar *tar, 2832 struct archive_entry *entry, const void *h, size_t *unconsumed) 2833 { 2834 const struct archive_entry_header_gnutar *header; 2835 int64_t t; 2836 int err = ARCHIVE_OK; 2837 2838 /* 2839 * GNU header is like POSIX ustar, except 'prefix' is 2840 * replaced with some other fields. This also means the 2841 * filename is stored as in old-style archives. 2842 */ 2843 2844 /* Grab fields common to all tar variants. */ 2845 err = header_common(a, tar, entry, h); 2846 if (err == ARCHIVE_FATAL) 2847 return (err); 2848 2849 /* Copy filename over (to ensure null termination). */ 2850 header = (const struct archive_entry_header_gnutar *)h; 2851 const char *existing_pathname = archive_entry_pathname(entry); 2852 if (existing_pathname == NULL || existing_pathname[0] == '\0') { 2853 if (archive_entry_copy_pathname_l(entry, 2854 header->name, sizeof(header->name), tar->sconv) != 0) { 2855 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 2856 if (err == ARCHIVE_FATAL) 2857 return (err); 2858 } 2859 } 2860 2861 /* Fields common to ustar and GNU */ 2862 /* XXX Can the following be factored out since it's common 2863 * to ustar and gnu tar? Is it okay to move it down into 2864 * header_common, perhaps? */ 2865 const char *existing_uname = archive_entry_uname(entry); 2866 if (existing_uname == NULL || existing_uname[0] == '\0') { 2867 if (archive_entry_copy_uname_l(entry, 2868 header->uname, sizeof(header->uname), tar->sconv) != 0) { 2869 err = set_conversion_failed_error(a, tar->sconv, "Uname"); 2870 if (err == ARCHIVE_FATAL) 2871 return (err); 2872 } 2873 } 2874 2875 const char *existing_gname = archive_entry_gname(entry); 2876 if (existing_gname == NULL || existing_gname[0] == '\0') { 2877 if (archive_entry_copy_gname_l(entry, 2878 header->gname, sizeof(header->gname), tar->sconv) != 0) { 2879 err = set_conversion_failed_error(a, tar->sconv, "Gname"); 2880 if (err == ARCHIVE_FATAL) 2881 return (err); 2882 } 2883 } 2884 2885 /* Parse out device numbers only for char and block specials */ 2886 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 2887 if (!archive_entry_rdev_is_set(entry)) { 2888 archive_entry_set_rdevmajor(entry, (dev_t) 2889 tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 2890 archive_entry_set_rdevminor(entry, (dev_t) 2891 tar_atol(header->rdevminor, sizeof(header->rdevminor))); 2892 } 2893 } else { 2894 archive_entry_set_rdev(entry, 0); 2895 } 2896 2897 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 2898 2899 /* Grab GNU-specific fields. */ 2900 if (!archive_entry_atime_is_set(entry)) { 2901 t = tar_atol(header->atime, sizeof(header->atime)); 2902 if (t > 0) 2903 archive_entry_set_atime(entry, t, 0); 2904 } 2905 if (!archive_entry_ctime_is_set(entry)) { 2906 t = tar_atol(header->ctime, sizeof(header->ctime)); 2907 if (t > 0) 2908 archive_entry_set_ctime(entry, t, 0); 2909 } 2910 2911 if (header->realsize[0] != 0) { 2912 tar->realsize 2913 = tar_atol(header->realsize, sizeof(header->realsize)); 2914 archive_entry_set_size(entry, tar->realsize); 2915 tar->realsize_override = 1; 2916 } 2917 2918 if (header->sparse[0].offset[0] != 0) { 2919 if (gnu_sparse_old_read(a, tar, header, unconsumed) 2920 != ARCHIVE_OK) 2921 return (ARCHIVE_FATAL); 2922 } else { 2923 if (header->isextended[0] != 0) { 2924 /* XXX WTF? XXX */ 2925 } 2926 } 2927 2928 return (err); 2929 } 2930 2931 static int 2932 gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, 2933 int64_t offset, int64_t remaining) 2934 { 2935 struct sparse_block *p; 2936 2937 p = calloc(1, sizeof(*p)); 2938 if (p == NULL) { 2939 archive_set_error(&a->archive, ENOMEM, "Out of memory"); 2940 return (ARCHIVE_FATAL); 2941 } 2942 if (tar->sparse_last != NULL) 2943 tar->sparse_last->next = p; 2944 else 2945 tar->sparse_list = p; 2946 tar->sparse_last = p; 2947 if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) { 2948 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data"); 2949 return (ARCHIVE_FATAL); 2950 } 2951 p->offset = offset; 2952 p->remaining = remaining; 2953 return (ARCHIVE_OK); 2954 } 2955 2956 static void 2957 gnu_clear_sparse_list(struct tar *tar) 2958 { 2959 struct sparse_block *p; 2960 2961 while (tar->sparse_list != NULL) { 2962 p = tar->sparse_list; 2963 tar->sparse_list = p->next; 2964 free(p); 2965 } 2966 tar->sparse_last = NULL; 2967 } 2968 2969 /* 2970 * GNU tar old-format sparse data. 2971 * 2972 * GNU old-format sparse data is stored in a fixed-field 2973 * format. Offset/size values are 11-byte octal fields (same 2974 * format as 'size' field in ustart header). These are 2975 * stored in the header, allocating subsequent header blocks 2976 * as needed. Extending the header in this way is a pretty 2977 * severe POSIX violation; this design has earned GNU tar a 2978 * lot of criticism. 2979 */ 2980 2981 static int 2982 gnu_sparse_old_read(struct archive_read *a, struct tar *tar, 2983 const struct archive_entry_header_gnutar *header, size_t *unconsumed) 2984 { 2985 ssize_t bytes_read; 2986 const void *data; 2987 struct extended { 2988 struct gnu_sparse sparse[21]; 2989 char isextended[1]; 2990 char padding[7]; 2991 }; 2992 const struct extended *ext; 2993 2994 if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) 2995 return (ARCHIVE_FATAL); 2996 if (header->isextended[0] == 0) 2997 return (ARCHIVE_OK); 2998 2999 do { 3000 tar_flush_unconsumed(a, unconsumed); 3001 data = __archive_read_ahead(a, 512, &bytes_read); 3002 if (data == NULL) { 3003 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3004 "Truncated tar archive " 3005 "detected while reading sparse file data"); 3006 return (ARCHIVE_FATAL); 3007 } 3008 *unconsumed = 512; 3009 ext = (const struct extended *)data; 3010 if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) 3011 return (ARCHIVE_FATAL); 3012 } while (ext->isextended[0] != 0); 3013 if (tar->sparse_list != NULL) 3014 tar->entry_offset = tar->sparse_list->offset; 3015 return (ARCHIVE_OK); 3016 } 3017 3018 static int 3019 gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, 3020 const struct gnu_sparse *sparse, int length) 3021 { 3022 while (length > 0 && sparse->offset[0] != 0) { 3023 if (gnu_add_sparse_entry(a, tar, 3024 tar_atol(sparse->offset, sizeof(sparse->offset)), 3025 tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) 3026 != ARCHIVE_OK) 3027 return (ARCHIVE_FATAL); 3028 sparse++; 3029 length--; 3030 } 3031 return (ARCHIVE_OK); 3032 } 3033 3034 /* 3035 * GNU tar sparse format 0.0 3036 * 3037 * Beginning with GNU tar 1.15, sparse files are stored using 3038 * information in the pax extended header. The GNU tar maintainers 3039 * have gone through a number of variations in the process of working 3040 * out this scheme; fortunately, they're all numbered. 3041 * 3042 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the 3043 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to 3044 * store offset/size for each block. The repeated instances of these 3045 * latter fields violate the pax specification (which frowns on 3046 * duplicate keys), so this format was quickly replaced. 3047 */ 3048 3049 /* 3050 * GNU tar sparse format 0.1 3051 * 3052 * This version replaced the offset/numbytes attributes with 3053 * a single "map" attribute that stored a list of integers. This 3054 * format had two problems: First, the "map" attribute could be very 3055 * long, which caused problems for some implementations. More 3056 * importantly, the sparse data was lost when extracted by archivers 3057 * that didn't recognize this extension. 3058 */ 3059 static int 3060 gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length) 3061 { 3062 const char *e; 3063 int64_t offset = -1, size = -1; 3064 3065 for (;;) { 3066 e = p; 3067 while (length > 0 && *e != ',') { 3068 if (*e < '0' || *e > '9') 3069 return (ARCHIVE_WARN); 3070 e++; 3071 length--; 3072 } 3073 if (offset < 0) { 3074 offset = tar_atol10(p, e - p); 3075 if (offset < 0) 3076 return (ARCHIVE_WARN); 3077 } else { 3078 size = tar_atol10(p, e - p); 3079 if (size < 0) 3080 return (ARCHIVE_WARN); 3081 if (gnu_add_sparse_entry(a, tar, offset, size) 3082 != ARCHIVE_OK) 3083 return (ARCHIVE_FATAL); 3084 offset = -1; 3085 } 3086 if (length == 0) 3087 return (ARCHIVE_OK); 3088 p = e + 1; 3089 length--; 3090 } 3091 } 3092 3093 /* 3094 * GNU tar sparse format 1.0 3095 * 3096 * The idea: The offset/size data is stored as a series of base-10 3097 * ASCII numbers prepended to the file data, so that dearchivers that 3098 * don't support this format will extract the block map along with the 3099 * data and a separate post-process can restore the sparseness. 3100 * 3101 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary 3102 * padding to the body of the file when using this format. GNU tar 3103 * 1.17 corrected this bug without bumping the version number, so 3104 * it's not possible to support both variants. This code supports 3105 * the later variant at the expense of not supporting the former. 3106 * 3107 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize 3108 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. 3109 */ 3110 3111 /* 3112 * Read the next line from the input, and parse it as a decimal 3113 * integer followed by '\n'. Returns positive integer value or 3114 * negative on error. 3115 */ 3116 static int64_t 3117 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, 3118 int64_t *remaining, size_t *unconsumed) 3119 { 3120 int64_t l, limit, last_digit_limit; 3121 const char *p; 3122 ssize_t bytes_read; 3123 int base, digit; 3124 3125 base = 10; 3126 limit = INT64_MAX / base; 3127 last_digit_limit = INT64_MAX % base; 3128 3129 /* 3130 * Skip any lines starting with '#'; GNU tar specs 3131 * don't require this, but they should. 3132 */ 3133 do { 3134 bytes_read = readline(a, tar, &p, 3135 (ssize_t)tar_min(*remaining, 100), unconsumed); 3136 if (bytes_read <= 0) 3137 return (ARCHIVE_FATAL); 3138 *remaining -= bytes_read; 3139 } while (p[0] == '#'); 3140 3141 l = 0; 3142 while (bytes_read > 0) { 3143 if (*p == '\n') 3144 return (l); 3145 if (*p < '0' || *p >= '0' + base) 3146 return (ARCHIVE_WARN); 3147 digit = *p - '0'; 3148 if (l > limit || (l == limit && digit > last_digit_limit)) 3149 l = INT64_MAX; /* Truncate on overflow. */ 3150 else 3151 l = (l * base) + digit; 3152 p++; 3153 bytes_read--; 3154 } 3155 /* TODO: Error message. */ 3156 return (ARCHIVE_WARN); 3157 } 3158 3159 /* 3160 * Returns length (in bytes) of the sparse data description 3161 * that was read. 3162 */ 3163 static ssize_t 3164 gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) 3165 { 3166 ssize_t bytes_read; 3167 int entries; 3168 int64_t offset, size, to_skip, remaining; 3169 3170 /* Clear out the existing sparse list. */ 3171 gnu_clear_sparse_list(tar); 3172 3173 remaining = tar->entry_bytes_remaining; 3174 3175 /* Parse entries. */ 3176 entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 3177 if (entries < 0) 3178 return (ARCHIVE_FATAL); 3179 /* Parse the individual entries. */ 3180 while (entries-- > 0) { 3181 /* Parse offset/size */ 3182 offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 3183 if (offset < 0) 3184 return (ARCHIVE_FATAL); 3185 size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 3186 if (size < 0) 3187 return (ARCHIVE_FATAL); 3188 /* Add a new sparse entry. */ 3189 if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) 3190 return (ARCHIVE_FATAL); 3191 } 3192 /* Skip rest of block... */ 3193 tar_flush_unconsumed(a, unconsumed); 3194 bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); 3195 to_skip = 0x1ff & -bytes_read; 3196 /* Fail if tar->entry_bytes_remaing would get negative */ 3197 if (to_skip > remaining) 3198 return (ARCHIVE_FATAL); 3199 if (to_skip != __archive_read_consume(a, to_skip)) 3200 return (ARCHIVE_FATAL); 3201 return ((ssize_t)(bytes_read + to_skip)); 3202 } 3203 3204 /* 3205 * Solaris pax extension for a sparse file. This is recorded with the 3206 * data and hole pairs. The way recording sparse information by Solaris' 3207 * pax simply indicates where data and sparse are, so the stored contents 3208 * consist of both data and hole. 3209 */ 3210 static int 3211 pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar, 3212 struct archive_entry *entry, const char *p, size_t length) 3213 { 3214 const char *e; 3215 int64_t start, end; 3216 int hole = 1; 3217 3218 (void)entry; /* UNUSED */ 3219 3220 end = 0; 3221 if (length <= 0) 3222 return (ARCHIVE_WARN); 3223 if (*p == ' ') { 3224 p++; 3225 length--; 3226 } else { 3227 return (ARCHIVE_WARN); 3228 } 3229 for (;;) { 3230 e = p; 3231 while (length > 0 && *e != ' ') { 3232 if (*e < '0' || *e > '9') 3233 return (ARCHIVE_WARN); 3234 e++; 3235 length--; 3236 } 3237 start = end; 3238 end = tar_atol10(p, e - p); 3239 if (end < 0) 3240 return (ARCHIVE_WARN); 3241 if (start < end) { 3242 if (gnu_add_sparse_entry(a, tar, start, 3243 end - start) != ARCHIVE_OK) 3244 return (ARCHIVE_FATAL); 3245 tar->sparse_last->hole = hole; 3246 } 3247 if (length == 0 || *e == '\n') { 3248 if (length == 0 && *e == '\n') { 3249 return (ARCHIVE_OK); 3250 } else { 3251 return (ARCHIVE_WARN); 3252 } 3253 } 3254 p = e + 1; 3255 length--; 3256 hole = hole == 0; 3257 } 3258 } 3259 3260 /*- 3261 * Convert text->integer. 3262 * 3263 * Traditional tar formats (including POSIX) specify base-8 for 3264 * all of the standard numeric fields. This is a significant limitation 3265 * in practice: 3266 * = file size is limited to 8GB 3267 * = rdevmajor and rdevminor are limited to 21 bits 3268 * = uid/gid are limited to 21 bits 3269 * 3270 * There are two workarounds for this: 3271 * = pax extended headers, which use variable-length string fields 3272 * = GNU tar and STAR both allow either base-8 or base-256 in 3273 * most fields. The high bit is set to indicate base-256. 3274 * 3275 * On read, this implementation supports both extensions. 3276 */ 3277 static int64_t 3278 tar_atol(const char *p, size_t char_cnt) 3279 { 3280 /* 3281 * Technically, GNU tar considers a field to be in base-256 3282 * only if the first byte is 0xff or 0x80. 3283 */ 3284 if (*p & 0x80) 3285 return (tar_atol256(p, char_cnt)); 3286 return (tar_atol8(p, char_cnt)); 3287 } 3288 3289 /* 3290 * Note that this implementation does not (and should not!) obey 3291 * locale settings; you cannot simply substitute strtol here, since 3292 * it does obey locale. 3293 */ 3294 static int64_t 3295 tar_atol_base_n(const char *p, size_t char_cnt, int base) 3296 { 3297 int64_t l, maxval, limit, last_digit_limit; 3298 int digit, sign; 3299 3300 maxval = INT64_MAX; 3301 limit = INT64_MAX / base; 3302 last_digit_limit = INT64_MAX % base; 3303 3304 /* the pointer will not be dereferenced if char_cnt is zero 3305 * due to the way the && operator is evaluated. 3306 */ 3307 while (char_cnt != 0 && (*p == ' ' || *p == '\t')) { 3308 p++; 3309 char_cnt--; 3310 } 3311 3312 sign = 1; 3313 if (char_cnt != 0 && *p == '-') { 3314 sign = -1; 3315 p++; 3316 char_cnt--; 3317 3318 maxval = INT64_MIN; 3319 limit = -(INT64_MIN / base); 3320 last_digit_limit = -(INT64_MIN % base); 3321 } 3322 3323 l = 0; 3324 if (char_cnt != 0) { 3325 digit = *p - '0'; 3326 while (digit >= 0 && digit < base && char_cnt != 0) { 3327 if (l>limit || (l == limit && digit >= last_digit_limit)) { 3328 return maxval; /* Truncate on overflow. */ 3329 } 3330 l = (l * base) + digit; 3331 digit = *++p - '0'; 3332 char_cnt--; 3333 } 3334 } 3335 return (sign < 0) ? -l : l; 3336 } 3337 3338 static int64_t 3339 tar_atol8(const char *p, size_t char_cnt) 3340 { 3341 return tar_atol_base_n(p, char_cnt, 8); 3342 } 3343 3344 static int64_t 3345 tar_atol10(const char *p, size_t char_cnt) 3346 { 3347 return tar_atol_base_n(p, char_cnt, 10); 3348 } 3349 3350 /* 3351 * Parse a base-256 integer. This is just a variable-length 3352 * twos-complement signed binary value in big-endian order, except 3353 * that the high-order bit is ignored. The values here can be up to 3354 * 12 bytes, so we need to be careful about overflowing 64-bit 3355 * (8-byte) integers. 3356 * 3357 * This code unashamedly assumes that the local machine uses 8-bit 3358 * bytes and twos-complement arithmetic. 3359 */ 3360 static int64_t 3361 tar_atol256(const char *_p, size_t char_cnt) 3362 { 3363 uint64_t l; 3364 const unsigned char *p = (const unsigned char *)_p; 3365 unsigned char c, neg; 3366 3367 /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */ 3368 c = *p; 3369 if (c & 0x40) { 3370 neg = 0xff; 3371 c |= 0x80; 3372 l = ~ARCHIVE_LITERAL_ULL(0); 3373 } else { 3374 neg = 0; 3375 c &= 0x7f; 3376 l = 0; 3377 } 3378 3379 /* If more than 8 bytes, check that we can ignore 3380 * high-order bits without overflow. */ 3381 while (char_cnt > sizeof(int64_t)) { 3382 --char_cnt; 3383 if (c != neg) 3384 return neg ? INT64_MIN : INT64_MAX; 3385 c = *++p; 3386 } 3387 3388 /* c is first byte that fits; if sign mismatch, return overflow */ 3389 if ((c ^ neg) & 0x80) { 3390 return neg ? INT64_MIN : INT64_MAX; 3391 } 3392 3393 /* Accumulate remaining bytes. */ 3394 while (--char_cnt > 0) { 3395 l = (l << 8) | c; 3396 c = *++p; 3397 } 3398 l = (l << 8) | c; 3399 /* Return signed twos-complement value. */ 3400 return (int64_t)(l); 3401 } 3402 3403 /* 3404 * Returns length of line (including trailing newline) 3405 * or negative on error. 'start' argument is updated to 3406 * point to first character of line. This avoids copying 3407 * when possible. 3408 */ 3409 static ssize_t 3410 readline(struct archive_read *a, struct tar *tar, const char **start, 3411 ssize_t limit, size_t *unconsumed) 3412 { 3413 ssize_t bytes_read; 3414 ssize_t total_size = 0; 3415 const void *t; 3416 const char *s; 3417 void *p; 3418 3419 tar_flush_unconsumed(a, unconsumed); 3420 3421 t = __archive_read_ahead(a, 1, &bytes_read); 3422 if (bytes_read <= 0 || t == NULL) 3423 return (ARCHIVE_FATAL); 3424 s = t; /* Start of line? */ 3425 p = memchr(t, '\n', bytes_read); 3426 /* If we found '\n' in the read buffer, return pointer to that. */ 3427 if (p != NULL) { 3428 bytes_read = 1 + ((const char *)p) - s; 3429 if (bytes_read > limit) { 3430 archive_set_error(&a->archive, 3431 ARCHIVE_ERRNO_FILE_FORMAT, 3432 "Line too long"); 3433 return (ARCHIVE_FATAL); 3434 } 3435 *unconsumed = bytes_read; 3436 *start = s; 3437 return (bytes_read); 3438 } 3439 *unconsumed = bytes_read; 3440 /* Otherwise, we need to accumulate in a line buffer. */ 3441 for (;;) { 3442 if (total_size + bytes_read > limit) { 3443 archive_set_error(&a->archive, 3444 ARCHIVE_ERRNO_FILE_FORMAT, 3445 "Line too long"); 3446 return (ARCHIVE_FATAL); 3447 } 3448 if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { 3449 archive_set_error(&a->archive, ENOMEM, 3450 "Can't allocate working buffer"); 3451 return (ARCHIVE_FATAL); 3452 } 3453 memcpy(tar->line.s + total_size, t, bytes_read); 3454 tar_flush_unconsumed(a, unconsumed); 3455 total_size += bytes_read; 3456 /* If we found '\n', clean up and return. */ 3457 if (p != NULL) { 3458 *start = tar->line.s; 3459 return (total_size); 3460 } 3461 /* Read some more. */ 3462 t = __archive_read_ahead(a, 1, &bytes_read); 3463 if (bytes_read <= 0 || t == NULL) 3464 return (ARCHIVE_FATAL); 3465 s = t; /* Start of line? */ 3466 p = memchr(t, '\n', bytes_read); 3467 /* If we found '\n', trim the read. */ 3468 if (p != NULL) { 3469 bytes_read = 1 + ((const char *)p) - s; 3470 } 3471 *unconsumed = bytes_read; 3472 } 3473 } 3474 3475 /* 3476 * base64_decode - Base64 decode 3477 * 3478 * This accepts most variations of base-64 encoding, including: 3479 * * with or without line breaks 3480 * * with or without the final group padded with '=' or '_' characters 3481 * (The most economical Base-64 variant does not pad the last group and 3482 * omits line breaks; RFC1341 used for MIME requires both.) 3483 */ 3484 static char * 3485 base64_decode(const char *s, size_t len, size_t *out_len) 3486 { 3487 static const unsigned char digits[64] = { 3488 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', 3489 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', 3490 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', 3491 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', 3492 '4','5','6','7','8','9','+','/' }; 3493 static unsigned char decode_table[128]; 3494 char *out, *d; 3495 const unsigned char *src = (const unsigned char *)s; 3496 3497 /* If the decode table is not yet initialized, prepare it. */ 3498 if (decode_table[digits[1]] != 1) { 3499 unsigned i; 3500 memset(decode_table, 0xff, sizeof(decode_table)); 3501 for (i = 0; i < sizeof(digits); i++) 3502 decode_table[digits[i]] = i; 3503 } 3504 3505 /* Allocate enough space to hold the entire output. */ 3506 /* Note that we may not use all of this... */ 3507 out = malloc(len - len / 4 + 1); 3508 if (out == NULL) { 3509 *out_len = 0; 3510 return (NULL); 3511 } 3512 d = out; 3513 3514 while (len > 0) { 3515 /* Collect the next group of (up to) four characters. */ 3516 int v = 0; 3517 int group_size = 0; 3518 while (group_size < 4 && len > 0) { 3519 /* '=' or '_' padding indicates final group. */ 3520 if (*src == '=' || *src == '_') { 3521 len = 0; 3522 break; 3523 } 3524 /* Skip illegal characters (including line breaks) */ 3525 if (*src > 127 || *src < 32 3526 || decode_table[*src] == 0xff) { 3527 len--; 3528 src++; 3529 continue; 3530 } 3531 v <<= 6; 3532 v |= decode_table[*src++]; 3533 len --; 3534 group_size++; 3535 } 3536 /* Align a short group properly. */ 3537 v <<= 6 * (4 - group_size); 3538 /* Unpack the group we just collected. */ 3539 switch (group_size) { 3540 case 4: d[2] = v & 0xff; 3541 /* FALLTHROUGH */ 3542 case 3: d[1] = (v >> 8) & 0xff; 3543 /* FALLTHROUGH */ 3544 case 2: d[0] = (v >> 16) & 0xff; 3545 break; 3546 case 1: /* this is invalid! */ 3547 break; 3548 } 3549 d += group_size * 3 / 4; 3550 } 3551 3552 *out_len = d - out; 3553 return (out); 3554 } 3555 3556 static char * 3557 url_decode(const char *in, size_t length) 3558 { 3559 char *out, *d; 3560 const char *s; 3561 3562 out = malloc(length + 1); 3563 if (out == NULL) 3564 return (NULL); 3565 for (s = in, d = out; length > 0 && *s != '\0'; ) { 3566 if (s[0] == '%' && length > 2) { 3567 /* Try to convert % escape */ 3568 int digit1 = tohex(s[1]); 3569 int digit2 = tohex(s[2]); 3570 if (digit1 >= 0 && digit2 >= 0) { 3571 /* Looks good, consume three chars */ 3572 s += 3; 3573 length -= 3; 3574 /* Convert output */ 3575 *d++ = ((digit1 << 4) | digit2); 3576 continue; 3577 } 3578 /* Else fall through and treat '%' as normal char */ 3579 } 3580 *d++ = *s++; 3581 --length; 3582 } 3583 *d = '\0'; 3584 return (out); 3585 } 3586 3587 static int 3588 tohex(int c) 3589 { 3590 if (c >= '0' && c <= '9') 3591 return (c - '0'); 3592 else if (c >= 'A' && c <= 'F') 3593 return (c - 'A' + 10); 3594 else if (c >= 'a' && c <= 'f') 3595 return (c - 'a' + 10); 3596 else 3597 return (-1); 3598 } 3599