1### the gnu tar specification: 2### http://www.gnu.org/software/tar/manual/tar.html 3### 4### and the pax format spec, which tar derives from: 5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6 7package Archive::Tar; 8require 5.005_03; 9 10use Cwd; 11use IO::Zlib; 12use IO::File; 13use Carp qw(carp croak); 14use File::Spec (); 15use File::Spec::Unix (); 16use File::Path (); 17 18use Archive::Tar::File; 19use Archive::Tar::Constant; 20 21require Exporter; 22 23use strict; 24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 26 $INSECURE_EXTRACT_MODE @ISA @EXPORT 27 ]; 28 29@ISA = qw[Exporter]; 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; 31$DEBUG = 0; 32$WARN = 1; 33$FOLLOW_SYMLINK = 0; 34$VERSION = "1.54"; 35$CHOWN = 1; 36$CHMOD = 1; 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38$DO_NOT_USE_PREFIX = 0; 39$INSECURE_EXTRACT_MODE = 0; 40 41BEGIN { 42 use Config; 43 $HAS_PERLIO = $Config::Config{useperlio}; 44 45 ### try and load IO::String anyway, so you can dynamically 46 ### switch between perlio and IO::String 47 $HAS_IO_STRING = eval { 48 require IO::String; 49 import IO::String; 50 1; 51 } || 0; 52} 53 54=head1 NAME 55 56Archive::Tar - module for manipulations of tar archives 57 58=head1 SYNOPSIS 59 60 use Archive::Tar; 61 my $tar = Archive::Tar->new; 62 63 $tar->read('origin.tgz'); 64 $tar->extract(); 65 66 $tar->add_files('file/foo.pl', 'docs/README'); 67 $tar->add_data('file/baz.txt', 'This is the contents now'); 68 69 $tar->rename('oldname', 'new/file/name'); 70 71 $tar->write('files.tar'); # plain tar 72 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 73 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 74 75=head1 DESCRIPTION 76 77Archive::Tar provides an object oriented mechanism for handling tar 78files. It provides class methods for quick and easy files handling 79while also allowing for the creation of tar file objects for custom 80manipulation. If you have the IO::Zlib module installed, 81Archive::Tar will also support compressed or gzipped tar files. 82 83An object of class Archive::Tar represents a .tar(.gz) archive full 84of files and things. 85 86=head1 Object Methods 87 88=head2 Archive::Tar->new( [$file, $compressed] ) 89 90Returns a new Tar object. If given any arguments, C<new()> calls the 91C<read()> method automatically, passing on the arguments provided to 92the C<read()> method. 93 94If C<new()> is invoked with arguments and the C<read()> method fails 95for any reason, C<new()> returns undef. 96 97=cut 98 99my $tmpl = { 100 _data => [ ], 101 _file => 'Unknown', 102}; 103 104### install get/set accessors for this object. 105for my $key ( keys %$tmpl ) { 106 no strict 'refs'; 107 *{__PACKAGE__."::$key"} = sub { 108 my $self = shift; 109 $self->{$key} = $_[0] if @_; 110 return $self->{$key}; 111 } 112} 113 114sub new { 115 my $class = shift; 116 $class = ref $class if ref $class; 117 118 ### copying $tmpl here since a shallow copy makes it use the 119 ### same aref, causing for files to remain in memory always. 120 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 121 122 if (@_) { 123 unless ( $obj->read( @_ ) ) { 124 $obj->_error(qq[No data could be read from file]); 125 return; 126 } 127 } 128 129 return $obj; 130} 131 132=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 133 134Read the given tar file into memory. 135The first argument can either be the name of a file or a reference to 136an already open filehandle (or an IO::Zlib object if it's compressed) 137 138The C<read> will I<replace> any previous content in C<$tar>! 139 140The second argument may be considered optional, but remains for 141backwards compatibility. Archive::Tar now looks at the file 142magic to determine what class should be used to open the file 143and will transparently Do The Right Thing. 144 145Archive::Tar will warn if you try to pass a bzip2 compressed file and the 146IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. 147 148Note that you can currently B<not> pass a C<gzip> compressed 149filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 150filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string 151containing the full archive information (either compressed or 152uncompressed). These are worth while features, but not currently 153implemented. See the C<TODO> section. 154 155The third argument can be a hash reference with options. Note that 156all options are case-sensitive. 157 158=over 4 159 160=item limit 161 162Do not read more than C<limit> files. This is useful if you have 163very big archives, and are only interested in the first few files. 164 165=item filter 166 167Can be set to a regular expression. Only files with names that match 168the expression will be read. 169 170=item extract 171 172If set to true, immediately extract entries when reading them. This 173gives you the same memory break as the C<extract_archive> function. 174Note however that entries will not be read into memory, but written 175straight to disk. This means no C<Archive::Tar::File> objects are 176created for you to inspect. 177 178=back 179 180All files are stored internally as C<Archive::Tar::File> objects. 181Please consult the L<Archive::Tar::File> documentation for details. 182 183Returns the number of files read in scalar context, and a list of 184C<Archive::Tar::File> objects in list context. 185 186=cut 187 188sub read { 189 my $self = shift; 190 my $file = shift; 191 my $gzip = shift || 0; 192 my $opts = shift || {}; 193 194 unless( defined $file ) { 195 $self->_error( qq[No file to read from!] ); 196 return; 197 } else { 198 $self->_file( $file ); 199 } 200 201 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 202 or return; 203 204 my $data = $self->_read_tar( $handle, $opts ) or return; 205 206 $self->_data( $data ); 207 208 return wantarray ? @$data : scalar @$data; 209} 210 211sub _get_handle { 212 my $self = shift; 213 my $file = shift; return unless defined $file; 214 return $file if ref $file; 215 my $compress = shift || 0; 216 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 217 218 219 ### get a FH opened to the right class, so we can use it transparently 220 ### throughout the program 221 my $fh; 222 { ### reading magic only makes sense if we're opening a file for 223 ### reading. otherwise, just use what the user requested. 224 my $magic = ''; 225 if( MODE_READ->($mode) ) { 226 open my $tmp, $file or do { 227 $self->_error( qq[Could not open '$file' for reading: $!] ); 228 return; 229 }; 230 231 ### read the first 4 bites of the file to figure out which class to 232 ### use to open the file. 233 sysread( $tmp, $magic, 4 ); 234 close $tmp; 235 } 236 237 ### is it bzip? 238 ### if you asked specifically for bzip compression, or if we're in 239 ### read mode and the magic numbers add up, use bzip 240 if( BZIP and ( 241 ($compress eq COMPRESS_BZIP) or 242 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 243 ) 244 ) { 245 246 ### different reader/writer modules, different error vars... sigh 247 if( MODE_READ->($mode) ) { 248 $fh = IO::Uncompress::Bunzip2->new( $file ) or do { 249 $self->_error( qq[Could not read '$file': ] . 250 $IO::Uncompress::Bunzip2::Bunzip2Error 251 ); 252 return; 253 }; 254 255 } else { 256 $fh = IO::Compress::Bzip2->new( $file ) or do { 257 $self->_error( qq[Could not write to '$file': ] . 258 $IO::Compress::Bzip2::Bzip2Error 259 ); 260 return; 261 }; 262 } 263 264 ### is it gzip? 265 ### if you asked for compression, if you wanted to read or the gzip 266 ### magic number is present (redundant with read) 267 } elsif( ZLIB and ( 268 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 269 ) 270 ) { 271 $fh = IO::Zlib->new; 272 273 unless( $fh->open( $file, $mode ) ) { 274 $self->_error(qq[Could not create filehandle for '$file': $!]); 275 return; 276 } 277 278 ### is it plain tar? 279 } else { 280 $fh = IO::File->new; 281 282 unless( $fh->open( $file, $mode ) ) { 283 $self->_error(qq[Could not create filehandle for '$file': $!]); 284 return; 285 } 286 287 ### enable bin mode on tar archives 288 binmode $fh; 289 } 290 } 291 292 return $fh; 293} 294 295 296sub _read_tar { 297 my $self = shift; 298 my $handle = shift or return; 299 my $opts = shift || {}; 300 301 my $count = $opts->{limit} || 0; 302 my $filter = $opts->{filter}; 303 my $extract = $opts->{extract} || 0; 304 305 ### set a cap on the amount of files to extract ### 306 my $limit = 0; 307 $limit = 1 if $count > 0; 308 309 my $tarfile = [ ]; 310 my $chunk; 311 my $read = 0; 312 my $real_name; # to set the name of a file when 313 # we're encountering @longlink 314 my $data; 315 316 LOOP: 317 while( $handle->read( $chunk, HEAD ) ) { 318 ### IO::Zlib doesn't support this yet 319 my $offset = eval { tell $handle } || 'unknown'; 320 321 unless( $read++ ) { 322 my $gzip = GZIP_MAGIC_NUM; 323 if( $chunk =~ /$gzip/ ) { 324 $self->_error( qq[Cannot read compressed format in tar-mode] ); 325 return; 326 } 327 328 ### size is < HEAD, which means a corrupted file, as the minimum 329 ### length is _at least_ HEAD 330 if (length $chunk != HEAD) { 331 $self->_error( qq[Cannot read enough bytes from the tarfile] ); 332 return; 333 } 334 } 335 336 ### if we can't read in all bytes... ### 337 last if length $chunk != HEAD; 338 339 ### Apparently this should really be two blocks of 512 zeroes, 340 ### but GNU tar sometimes gets it wrong. See comment in the 341 ### source code (tar.c) to GNU cpio. 342 next if $chunk eq TAR_END; 343 344 ### according to the posix spec, the last 12 bytes of the header are 345 ### null bytes, to pad it to a 512 byte block. That means if these 346 ### bytes are NOT null bytes, it's a corrrupt header. See: 347 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 348 ### line 111 349 { my $nulls = join '', "\0" x 12; 350 unless( $nulls eq substr( $chunk, 500, 12 ) ) { 351 $self->_error( qq[Invalid header block at offset $offset] ); 352 next LOOP; 353 } 354 } 355 356 ### pass the realname, so we can set it 'proper' right away 357 ### some of the heuristics are done on the name, so important 358 ### to set it ASAP 359 my $entry; 360 { my %extra_args = (); 361 $extra_args{'name'} = $$real_name if defined $real_name; 362 363 unless( $entry = Archive::Tar::File->new( chunk => $chunk, 364 %extra_args ) 365 ) { 366 $self->_error( qq[Couldn't read chunk at offset $offset] ); 367 next LOOP; 368 } 369 } 370 371 ### ignore labels: 372 ### http://www.gnu.org/manual/tar/html_node/tar_139.html 373 next if $entry->is_label; 374 375 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 376 377 if ( $entry->is_file && !$entry->validate ) { 378 ### sometimes the chunk is rather fux0r3d and a whole 512 379 ### bytes ends up in the ->name area. 380 ### clean it up, if need be 381 my $name = $entry->name; 382 $name = substr($name, 0, 100) if length $name > 100; 383 $name =~ s/\n/ /g; 384 385 $self->_error( $name . qq[: checksum error] ); 386 next LOOP; 387 } 388 389 my $block = BLOCK_SIZE->( $entry->size ); 390 391 $data = $entry->get_content_by_ref; 392 393 ### just read everything into memory 394 ### can't do lazy loading since IO::Zlib doesn't support 'seek' 395 ### this is because Compress::Zlib doesn't support it =/ 396 ### this reads in the whole data in one read() call. 397 if( $handle->read( $$data, $block ) < $block ) { 398 $self->_error( qq[Read error on tarfile (missing data) ']. 399 $entry->full_path ."' at offset $offset" ); 400 next LOOP; 401 } 402 403 ### throw away trailing garbage ### 404 substr ($$data, $entry->size) = "" if defined $$data; 405 406 ### part II of the @LongLink munging -- need to do /after/ 407 ### the checksum check. 408 if( $entry->is_longlink ) { 409 ### weird thing in tarfiles -- if the file is actually a 410 ### @LongLink, the data part seems to have a trailing ^@ 411 ### (unprintable) char. to display, pipe output through less. 412 ### but that doesn't *always* happen.. so check if the last 413 ### character is a control character, and if so remove it 414 ### at any rate, we better remove that character here, or tests 415 ### like 'eq' and hashlook ups based on names will SO not work 416 ### remove it by calculating the proper size, and then 417 ### tossing out everything that's longer than that size. 418 419 ### count number of nulls 420 my $nulls = $$data =~ tr/\0/\0/; 421 422 ### cut data + size by that many bytes 423 $entry->size( $entry->size - $nulls ); 424 substr ($$data, $entry->size) = ""; 425 } 426 } 427 428 ### clean up of the entries.. posix tar /apparently/ has some 429 ### weird 'feature' that allows for filenames > 255 characters 430 ### they'll put a header in with as name '././@LongLink' and the 431 ### contents will be the name of the /next/ file in the archive 432 ### pretty crappy and kludgy if you ask me 433 434 ### set the name for the next entry if this is a @LongLink; 435 ### this is one ugly hack =/ but needed for direct extraction 436 if( $entry->is_longlink ) { 437 $real_name = $data; 438 next LOOP; 439 } elsif ( defined $real_name ) { 440 $entry->name( $$real_name ); 441 $entry->prefix(''); 442 undef $real_name; 443 } 444 445 ### skip this entry if we're filtering 446 if ($filter && $entry->name !~ $filter) { 447 next LOOP; 448 449 ### skip this entry if it's a pax header. This is a special file added 450 ### by, among others, git-generated tarballs. It holds comments and is 451 ### not meant for extracting. See #38932: pax_global_header extracted 452 } elsif ( $entry->name eq PAX_HEADER ) { 453 next LOOP; 454 } 455 456 $self->_extract_file( $entry ) if $extract 457 && !$entry->is_longlink 458 && !$entry->is_unknown 459 && !$entry->is_label; 460 461 ### Guard against tarfiles with garbage at the end 462 last LOOP if $entry->name eq ''; 463 464 ### push only the name on the rv if we're extracting 465 ### -- for extract_archive 466 push @$tarfile, ($extract ? $entry->name : $entry); 467 468 if( $limit ) { 469 $count-- unless $entry->is_longlink || $entry->is_dir; 470 last LOOP unless $count; 471 } 472 } continue { 473 undef $data; 474 } 475 476 return $tarfile; 477} 478 479=head2 $tar->contains_file( $filename ) 480 481Check if the archive contains a certain file. 482It will return true if the file is in the archive, false otherwise. 483 484Note however, that this function does an exact match using C<eq> 485on the full path. So it cannot compensate for case-insensitive file- 486systems or compare 2 paths to see if they would point to the same 487underlying file. 488 489=cut 490 491sub contains_file { 492 my $self = shift; 493 my $full = shift; 494 495 return unless defined $full; 496 497 ### don't warn if the entry isn't there.. that's what this function 498 ### is for after all. 499 local $WARN = 0; 500 return 1 if $self->_find_entry($full); 501 return; 502} 503 504=head2 $tar->extract( [@filenames] ) 505 506Write files whose names are equivalent to any of the names in 507C<@filenames> to disk, creating subdirectories as necessary. This 508might not work too well under VMS. 509Under MacPerl, the file's modification time will be converted to the 510MacOS zero of time, and appropriate conversions will be done to the 511path. However, the length of each element of the path is not 512inspected to see whether it's longer than MacOS currently allows (32 513characters). 514 515If C<extract> is called without a list of file names, the entire 516contents of the archive are extracted. 517 518Returns a list of filenames extracted. 519 520=cut 521 522sub extract { 523 my $self = shift; 524 my @args = @_; 525 my @files; 526 527 # use the speed optimization for all extracted files 528 local($self->{cwd}) = cwd() unless $self->{cwd}; 529 530 ### you requested the extraction of only certian files 531 if( @args ) { 532 for my $file ( @args ) { 533 534 ### it's already an object? 535 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 536 push @files, $file; 537 next; 538 539 ### go find it then 540 } else { 541 542 my $found; 543 for my $entry ( @{$self->_data} ) { 544 next unless $file eq $entry->full_path; 545 546 ### we found the file you're looking for 547 push @files, $entry; 548 $found++; 549 } 550 551 unless( $found ) { 552 return $self->_error( 553 qq[Could not find '$file' in archive] ); 554 } 555 } 556 } 557 558 ### just grab all the file items 559 } else { 560 @files = $self->get_files; 561 } 562 563 ### nothing found? that's an error 564 unless( scalar @files ) { 565 $self->_error( qq[No files found for ] . $self->_file ); 566 return; 567 } 568 569 ### now extract them 570 for my $entry ( @files ) { 571 unless( $self->_extract_file( $entry ) ) { 572 $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 573 return; 574 } 575 } 576 577 return @files; 578} 579 580=head2 $tar->extract_file( $file, [$extract_path] ) 581 582Write an entry, whose name is equivalent to the file name provided to 583disk. Optionally takes a second parameter, which is the full native 584path (including filename) the entry will be written to. 585 586For example: 587 588 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 589 590 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 591 592Returns true on success, false on failure. 593 594=cut 595 596sub extract_file { 597 my $self = shift; 598 my $file = shift; return unless defined $file; 599 my $alt = shift; 600 601 my $entry = $self->_find_entry( $file ) 602 or $self->_error( qq[Could not find an entry for '$file'] ), return; 603 604 return $self->_extract_file( $entry, $alt ); 605} 606 607sub _extract_file { 608 my $self = shift; 609 my $entry = shift or return; 610 my $alt = shift; 611 612 ### you wanted an alternate extraction location ### 613 my $name = defined $alt ? $alt : $entry->full_path; 614 615 ### splitpath takes a bool at the end to indicate 616 ### that it's splitting a dir 617 my ($vol,$dirs,$file); 618 if ( defined $alt ) { # It's a local-OS path 619 ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 620 $entry->is_dir ); 621 } else { 622 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 623 $entry->is_dir ); 624 } 625 626 my $dir; 627 ### is $name an absolute path? ### 628 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 629 630 ### absolute names are not allowed to be in tarballs under 631 ### strict mode, so only allow it if a user tells us to do it 632 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 633 $self->_error( 634 q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 635 q[Not extracting absolute paths under SECURE EXTRACT MODE] 636 ); 637 return; 638 } 639 640 ### user asked us to, it's fine. 641 $dir = File::Spec->catpath( $vol, $dirs, "" ); 642 643 ### it's a relative path ### 644 } else { 645 my $cwd = (ref $self and defined $self->{cwd}) 646 ? $self->{cwd} 647 : cwd(); 648 649 my @dirs = defined $alt 650 ? File::Spec->splitdir( $dirs ) # It's a local-OS path 651 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 652 # straight from the tarball 653 654 if( not defined $alt and 655 not $INSECURE_EXTRACT_MODE 656 ) { 657 658 ### paths that leave the current directory are not allowed under 659 ### strict mode, so only allow it if a user tells us to do this. 660 if( grep { $_ eq '..' } @dirs ) { 661 662 $self->_error( 663 q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 664 q[the current working directory. Not extracting under ]. 665 q[SECURE EXTRACT MODE] 666 ); 667 return; 668 } 669 670 ### the archive may be asking us to extract into a symlink. This 671 ### is not sane and a possible security issue, as outlined here: 672 ### https://rt.cpan.org/Ticket/Display.html?id=30380 673 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 674 ### https://issues.rpath.com/browse/RPL-1716 675 my $full_path = $cwd; 676 for my $d ( @dirs ) { 677 $full_path = File::Spec->catdir( $full_path, $d ); 678 679 ### we've already checked this one, and it's safe. Move on. 680 next if ref $self and $self->{_link_cache}->{$full_path}; 681 682 if( -l $full_path ) { 683 my $to = readlink $full_path; 684 my $diag = "symlinked directory ($full_path => $to)"; 685 686 $self->_error( 687 q[Entry ']. $entry->full_path .q[' is attempting to ]. 688 qq[extract to a $diag. This is considered a security ]. 689 q[vulnerability and not allowed under SECURE EXTRACT ]. 690 q[MODE] 691 ); 692 return; 693 } 694 695 ### XXX keep a cache if possible, so the stats become cheaper: 696 $self->{_link_cache}->{$full_path} = 1 if ref $self; 697 } 698 } 699 700 ### '.' is the directory delimiter on VMS, which has to be escaped 701 ### or changed to '_' on vms. vmsify is used, because older versions 702 ### of vmspath do not handle this properly. 703 ### Must not add a '/' to an empty directory though. 704 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 705 706 my ($cwd_vol,$cwd_dir,$cwd_file) 707 = File::Spec->splitpath( $cwd ); 708 my @cwd = File::Spec->splitdir( $cwd_dir ); 709 push @cwd, $cwd_file if length $cwd_file; 710 711 ### We need to pass '' as the last elemant to catpath. Craig Berry 712 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 713 ### The root problem is that splitpath on UNIX always returns the 714 ### final path element as a file even if it is a directory, and of 715 ### course there is no way it can know the difference without checking 716 ### against the filesystem, which it is documented as not doing. When 717 ### you turn around and call catpath, on VMS you have to know which bits 718 ### are directory bits and which bits are file bits. In this case we 719 ### know the result should be a directory. I had thought you could omit 720 ### the file argument to catpath in such a case, but apparently on UNIX 721 ### you can't. 722 $dir = File::Spec->catpath( 723 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 724 ); 725 726 ### catdir() returns undef if the path is longer than 255 chars on 727 ### older VMS systems. 728 unless ( defined $dir ) { 729 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 730 return; 731 } 732 733 } 734 735 if( -e $dir && !-d _ ) { 736 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 737 return; 738 } 739 740 unless ( -d _ ) { 741 eval { File::Path::mkpath( $dir, 0, 0777 ) }; 742 if( $@ ) { 743 my $fp = $entry->full_path; 744 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 745 return; 746 } 747 748 ### XXX chown here? that might not be the same as in the archive 749 ### as we're only chown'ing to the owner of the file we're extracting 750 ### not to the owner of the directory itself, which may or may not 751 ### be another entry in the archive 752 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 753 ### way to go. 754 #if( $CHOWN && CAN_CHOWN ) { 755 # chown $entry->uid, $entry->gid, $dir or 756 # $self->_error( qq[Could not set uid/gid on '$dir'] ); 757 #} 758 } 759 760 ### we're done if we just needed to create a dir ### 761 return 1 if $entry->is_dir; 762 763 my $full = File::Spec->catfile( $dir, $file ); 764 765 if( $entry->is_unknown ) { 766 $self->_error( qq[Unknown file type for file '$full'] ); 767 return; 768 } 769 770 if( length $entry->type && $entry->is_file ) { 771 my $fh = IO::File->new; 772 $fh->open( '>' . $full ) or ( 773 $self->_error( qq[Could not open file '$full': $!] ), 774 return 775 ); 776 777 if( $entry->size ) { 778 binmode $fh; 779 syswrite $fh, $entry->data or ( 780 $self->_error( qq[Could not write data to '$full'] ), 781 return 782 ); 783 } 784 785 close $fh or ( 786 $self->_error( qq[Could not close file '$full'] ), 787 return 788 ); 789 790 } else { 791 $self->_make_special_file( $entry, $full ) or return; 792 } 793 794 ### only update the timestamp if it's not a symlink; that will change the 795 ### timestamp of the original. This addresses bug #33669: Could not update 796 ### timestamp warning on symlinks 797 if( not -l $full ) { 798 utime time, $entry->mtime - TIME_OFFSET, $full or 799 $self->_error( qq[Could not update timestamp] ); 800 } 801 802 if( $CHOWN && CAN_CHOWN->() ) { 803 chown $entry->uid, $entry->gid, $full or 804 $self->_error( qq[Could not set uid/gid on '$full'] ); 805 } 806 807 ### only chmod if we're allowed to, but never chmod symlinks, since they'll 808 ### change the perms on the file they're linking too... 809 if( $CHMOD and not -l $full ) { 810 my $mode = $entry->mode; 811 unless ($SAME_PERMISSIONS) { 812 $mode &= ~(oct(7000) | umask); 813 } 814 chmod $mode, $full or 815 $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 816 } 817 818 return 1; 819} 820 821sub _make_special_file { 822 my $self = shift; 823 my $entry = shift or return; 824 my $file = shift; return unless defined $file; 825 826 my $err; 827 828 if( $entry->is_symlink ) { 829 my $fail; 830 if( ON_UNIX ) { 831 symlink( $entry->linkname, $file ) or $fail++; 832 833 } else { 834 $self->_extract_special_file_as_plain_file( $entry, $file ) 835 or $fail++; 836 } 837 838 $err = qq[Making symbolic link '$file' to '] . 839 $entry->linkname .q[' failed] if $fail; 840 841 } elsif ( $entry->is_hardlink ) { 842 my $fail; 843 if( ON_UNIX ) { 844 link( $entry->linkname, $file ) or $fail++; 845 846 } else { 847 $self->_extract_special_file_as_plain_file( $entry, $file ) 848 or $fail++; 849 } 850 851 $err = qq[Making hard link from '] . $entry->linkname . 852 qq[' to '$file' failed] if $fail; 853 854 } elsif ( $entry->is_fifo ) { 855 ON_UNIX && !system('mknod', $file, 'p') or 856 $err = qq[Making fifo ']. $entry->name .qq[' failed]; 857 858 } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 859 my $mode = $entry->is_blockdev ? 'b' : 'c'; 860 861 ON_UNIX && !system('mknod', $file, $mode, 862 $entry->devmajor, $entry->devminor) or 863 $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 864 $entry->devmajor . qq[ min=] . $entry->devminor . 865 qq[) failed.]; 866 867 } elsif ( $entry->is_socket ) { 868 ### the original doesn't do anything special for sockets.... ### 869 1; 870 } 871 872 return $err ? $self->_error( $err ) : 1; 873} 874 875### don't know how to make symlinks, let's just extract the file as 876### a plain file 877sub _extract_special_file_as_plain_file { 878 my $self = shift; 879 my $entry = shift or return; 880 my $file = shift; return unless defined $file; 881 882 my $err; 883 TRY: { 884 my $orig = $self->_find_entry( $entry->linkname ); 885 886 unless( $orig ) { 887 $err = qq[Could not find file '] . $entry->linkname . 888 qq[' in memory.]; 889 last TRY; 890 } 891 892 ### clone the entry, make it appear as a normal file ### 893 my $clone = $entry->clone; 894 $clone->_downgrade_to_plainfile; 895 $self->_extract_file( $clone, $file ) or last TRY; 896 897 return 1; 898 } 899 900 return $self->_error($err); 901} 902 903=head2 $tar->list_files( [\@properties] ) 904 905Returns a list of the names of all the files in the archive. 906 907If C<list_files()> is passed an array reference as its first argument 908it returns a list of hash references containing the requested 909properties of each file. The following list of properties is 910supported: name, size, mtime (last modified date), mode, uid, gid, 911linkname, uname, gname, devmajor, devminor, prefix. 912 913Passing an array reference containing only one element, 'name', is 914special cased to return a list of names rather than a list of hash 915references, making it equivalent to calling C<list_files> without 916arguments. 917 918=cut 919 920sub list_files { 921 my $self = shift; 922 my $aref = shift || [ ]; 923 924 unless( $self->_data ) { 925 $self->read() or return; 926 } 927 928 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 929 return map { $_->full_path } @{$self->_data}; 930 } else { 931 932 #my @rv; 933 #for my $obj ( @{$self->_data} ) { 934 # push @rv, { map { $_ => $obj->$_() } @$aref }; 935 #} 936 #return @rv; 937 938 ### this does the same as the above.. just needs a +{ } 939 ### to make sure perl doesn't confuse it for a block 940 return map { my $o=$_; 941 +{ map { $_ => $o->$_() } @$aref } 942 } @{$self->_data}; 943 } 944} 945 946sub _find_entry { 947 my $self = shift; 948 my $file = shift; 949 950 unless( defined $file ) { 951 $self->_error( qq[No file specified] ); 952 return; 953 } 954 955 ### it's an object already 956 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 957 958 for my $entry ( @{$self->_data} ) { 959 my $path = $entry->full_path; 960 return $entry if $path eq $file; 961 } 962 963 $self->_error( qq[No such file in archive: '$file'] ); 964 return; 965} 966 967=head2 $tar->get_files( [@filenames] ) 968 969Returns the C<Archive::Tar::File> objects matching the filenames 970provided. If no filename list was passed, all C<Archive::Tar::File> 971objects in the current Tar object are returned. 972 973Please refer to the C<Archive::Tar::File> documentation on how to 974handle these objects. 975 976=cut 977 978sub get_files { 979 my $self = shift; 980 981 return @{ $self->_data } unless @_; 982 983 my @list; 984 for my $file ( @_ ) { 985 push @list, grep { defined } $self->_find_entry( $file ); 986 } 987 988 return @list; 989} 990 991=head2 $tar->get_content( $file ) 992 993Return the content of the named file. 994 995=cut 996 997sub get_content { 998 my $self = shift; 999 my $entry = $self->_find_entry( shift ) or return; 1000 1001 return $entry->data; 1002} 1003 1004=head2 $tar->replace_content( $file, $content ) 1005 1006Make the string $content be the content for the file named $file. 1007 1008=cut 1009 1010sub replace_content { 1011 my $self = shift; 1012 my $entry = $self->_find_entry( shift ) or return; 1013 1014 return $entry->replace_content( shift ); 1015} 1016 1017=head2 $tar->rename( $file, $new_name ) 1018 1019Rename the file of the in-memory archive to $new_name. 1020 1021Note that you must specify a Unix path for $new_name, since per tar 1022standard, all files in the archive must be Unix paths. 1023 1024Returns true on success and false on failure. 1025 1026=cut 1027 1028sub rename { 1029 my $self = shift; 1030 my $file = shift; return unless defined $file; 1031 my $new = shift; return unless defined $new; 1032 1033 my $entry = $self->_find_entry( $file ) or return; 1034 1035 return $entry->rename( $new ); 1036} 1037 1038=head2 $tar->remove (@filenamelist) 1039 1040Removes any entries with names matching any of the given filenames 1041from the in-memory archive. Returns a list of C<Archive::Tar::File> 1042objects that remain. 1043 1044=cut 1045 1046sub remove { 1047 my $self = shift; 1048 my @list = @_; 1049 1050 my %seen = map { $_->full_path => $_ } @{$self->_data}; 1051 delete $seen{ $_ } for @list; 1052 1053 $self->_data( [values %seen] ); 1054 1055 return values %seen; 1056} 1057 1058=head2 $tar->clear 1059 1060C<clear> clears the current in-memory archive. This effectively gives 1061you a 'blank' object, ready to be filled again. Note that C<clear> 1062only has effect on the object, not the underlying tarfile. 1063 1064=cut 1065 1066sub clear { 1067 my $self = shift or return; 1068 1069 $self->_data( [] ); 1070 $self->_file( '' ); 1071 1072 return 1; 1073} 1074 1075 1076=head2 $tar->write ( [$file, $compressed, $prefix] ) 1077 1078Write the in-memory archive to disk. The first argument can either 1079be the name of a file or a reference to an already open filehandle (a 1080GLOB reference). 1081 1082The second argument is used to indicate compression. You can either 1083compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1084to be the C<gzip> compression level (between 1 and 9), but the use of 1085constants is prefered: 1086 1087 # write a gzip compressed file 1088 $tar->write( 'out.tgz', COMPRESS_GZIP ); 1089 1090 # write a bzip compressed file 1091 $tar->write( 'out.tbz', COMPRESS_BZIP ); 1092 1093Note that when you pass in a filehandle, the compression argument 1094is ignored, as all files are printed verbatim to your filehandle. 1095If you wish to enable compression with filehandles, use an 1096C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1097 1098The third argument is an optional prefix. All files will be tucked 1099away in the directory you specify as prefix. So if you have files 1100'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1101will be written to the archive as 'foo/a' and 'foo/b'. 1102 1103If no arguments are given, C<write> returns the entire formatted 1104archive as a string, which could be useful if you'd like to stuff the 1105archive into a socket or a pipe to gzip or something. 1106 1107 1108=cut 1109 1110sub write { 1111 my $self = shift; 1112 my $file = shift; $file = '' unless defined $file; 1113 my $gzip = shift || 0; 1114 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1115 my $dummy = ''; 1116 1117 ### only need a handle if we have a file to print to ### 1118 my $handle = length($file) 1119 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1120 or return ) 1121 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1122 : $HAS_IO_STRING ? IO::String->new 1123 : __PACKAGE__->no_string_support(); 1124 1125 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1126 ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1127 ### printed to the archive 1128 local $\; 1129 1130 for my $entry ( @{$self->_data} ) { 1131 ### entries to be written to the tarfile ### 1132 my @write_me; 1133 1134 ### only now will we change the object to reflect the current state 1135 ### of the name and prefix fields -- this needs to be limited to 1136 ### write() only! 1137 my $clone = $entry->clone; 1138 1139 1140 ### so, if you don't want use to use the prefix, we'll stuff 1141 ### everything in the name field instead 1142 if( $DO_NOT_USE_PREFIX ) { 1143 1144 ### you might have an extended prefix, if so, set it in the clone 1145 ### XXX is ::Unix right? 1146 $clone->name( length $ext_prefix 1147 ? File::Spec::Unix->catdir( $ext_prefix, 1148 $clone->full_path) 1149 : $clone->full_path ); 1150 $clone->prefix( '' ); 1151 1152 ### otherwise, we'll have to set it properly -- prefix part in the 1153 ### prefix and name part in the name field. 1154 } else { 1155 1156 ### split them here, not before! 1157 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1158 1159 ### you might have an extended prefix, if so, set it in the clone 1160 ### XXX is ::Unix right? 1161 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1162 if length $ext_prefix; 1163 1164 $clone->prefix( $prefix ); 1165 $clone->name( $name ); 1166 } 1167 1168 ### names are too long, and will get truncated if we don't add a 1169 ### '@LongLink' file... 1170 my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1171 length($clone->prefix) > PREFIX_LENGTH 1172 ) || 0; 1173 1174 ### perhaps we need to make a longlink file? 1175 if( $make_longlink ) { 1176 my $longlink = Archive::Tar::File->new( 1177 data => LONGLINK_NAME, 1178 $clone->full_path, 1179 { type => LONGLINK } 1180 ); 1181 1182 unless( $longlink ) { 1183 $self->_error( qq[Could not create 'LongLink' entry for ] . 1184 qq[oversize file '] . $clone->full_path ."'" ); 1185 return; 1186 }; 1187 1188 push @write_me, $longlink; 1189 } 1190 1191 push @write_me, $clone; 1192 1193 ### write the one, optionally 2 a::t::file objects to the handle 1194 for my $clone (@write_me) { 1195 1196 ### if the file is a symlink, there are 2 options: 1197 ### either we leave the symlink intact, but then we don't write any 1198 ### data OR we follow the symlink, which means we actually make a 1199 ### copy. if we do the latter, we have to change the TYPE of the 1200 ### clone to 'FILE' 1201 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1202 my $data_ok = !$clone->is_symlink && $clone->has_content; 1203 1204 ### downgrade to a 'normal' file if it's a symlink we're going to 1205 ### treat as a regular file 1206 $clone->_downgrade_to_plainfile if $link_ok; 1207 1208 ### get the header for this block 1209 my $header = $self->_format_tar_entry( $clone ); 1210 unless( $header ) { 1211 $self->_error(q[Could not format header for: ] . 1212 $clone->full_path ); 1213 return; 1214 } 1215 1216 unless( print $handle $header ) { 1217 $self->_error(q[Could not write header for: ] . 1218 $clone->full_path); 1219 return; 1220 } 1221 1222 if( $link_ok or $data_ok ) { 1223 unless( print $handle $clone->data ) { 1224 $self->_error(q[Could not write data for: ] . 1225 $clone->full_path); 1226 return; 1227 } 1228 1229 ### pad the end of the clone if required ### 1230 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1231 } 1232 1233 } ### done writing these entries 1234 } 1235 1236 ### write the end markers ### 1237 print $handle TAR_END x 2 or 1238 return $self->_error( qq[Could not write tar end markers] ); 1239 1240 ### did you want it written to a file, or returned as a string? ### 1241 my $rv = length($file) ? 1 1242 : $HAS_PERLIO ? $dummy 1243 : do { seek $handle, 0, 0; local $/; <$handle> }; 1244 1245 ### make sure to close the handle; 1246 close $handle; 1247 1248 return $rv; 1249} 1250 1251sub _format_tar_entry { 1252 my $self = shift; 1253 my $entry = shift or return; 1254 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1255 my $no_prefix = shift || 0; 1256 1257 my $file = $entry->name; 1258 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1259 1260 ### remove the prefix from the file name 1261 ### not sure if this is still neeeded --kane 1262 ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1263 ### this for us. Even worse, this would break if we tried to add a file 1264 ### like x/x. 1265 #if( length $prefix ) { 1266 # $file =~ s/^$match//; 1267 #} 1268 1269 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1270 if length $ext_prefix; 1271 1272 ### not sure why this is... ### 1273 my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1274 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1275 1276 my $f1 = "%06o"; my $f2 = "%11o"; 1277 1278 ### this might be optimizable with a 'changed' flag in the file objects ### 1279 my $tar = pack ( 1280 PACK, 1281 $file, 1282 1283 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1284 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1285 1286 "", # checksum field - space padded a bit down 1287 1288 (map { $entry->$_() } qw[type linkname magic]), 1289 1290 $entry->version || TAR_VERSION, 1291 1292 (map { $entry->$_() } qw[uname gname]), 1293 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1294 1295 ($no_prefix ? '' : $prefix) 1296 ); 1297 1298 ### add the checksum ### 1299 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1300 1301 return $tar; 1302} 1303 1304=head2 $tar->add_files( @filenamelist ) 1305 1306Takes a list of filenames and adds them to the in-memory archive. 1307 1308The path to the file is automatically converted to a Unix like 1309equivalent for use in the archive, and, if on MacOS, the file's 1310modification time is converted from the MacOS epoch to the Unix epoch. 1311So tar archives created on MacOS with B<Archive::Tar> can be read 1312both with I<tar> on Unix and applications like I<suntar> or 1313I<Stuffit Expander> on MacOS. 1314 1315Be aware that the file's type/creator and resource fork will be lost, 1316which is usually what you want in cross-platform archives. 1317 1318Instead of a filename, you can also pass it an existing C<Archive::Tar::File> 1319object from, for example, another archive. The object will be clone, and 1320effectively be a copy of the original, not an alias. 1321 1322Returns a list of C<Archive::Tar::File> objects that were just added. 1323 1324=cut 1325 1326sub add_files { 1327 my $self = shift; 1328 my @files = @_ or return; 1329 1330 my @rv; 1331 for my $file ( @files ) { 1332 1333 ### you passed an Archive::Tar::File object 1334 ### clone it so we don't accidentally have a reference to 1335 ### an object from another archive 1336 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1337 push @rv, $file->clone; 1338 next; 1339 } 1340 1341 unless( -e $file || -l $file ) { 1342 $self->_error( qq[No such file: '$file'] ); 1343 next; 1344 } 1345 1346 my $obj = Archive::Tar::File->new( file => $file ); 1347 unless( $obj ) { 1348 $self->_error( qq[Unable to add file: '$file'] ); 1349 next; 1350 } 1351 1352 push @rv, $obj; 1353 } 1354 1355 push @{$self->{_data}}, @rv; 1356 1357 return @rv; 1358} 1359 1360=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1361 1362Takes a filename, a scalar full of data and optionally a reference to 1363a hash with specific options. 1364 1365Will add a file to the in-memory archive, with name C<$filename> and 1366content C<$data>. Specific properties can be set using C<$opthashref>. 1367The following list of properties is supported: name, size, mtime 1368(last modified date), mode, uid, gid, linkname, uname, gname, 1369devmajor, devminor, prefix, type. (On MacOS, the file's path and 1370modification times are converted to Unix equivalents.) 1371 1372Valid values for the file type are the following constants defined in 1373Archive::Tar::Constants: 1374 1375=over 4 1376 1377=item FILE 1378 1379Regular file. 1380 1381=item HARDLINK 1382 1383=item SYMLINK 1384 1385Hard and symbolic ("soft") links; linkname should specify target. 1386 1387=item CHARDEV 1388 1389=item BLOCKDEV 1390 1391Character and block devices. devmajor and devminor should specify the major 1392and minor device numbers. 1393 1394=item DIR 1395 1396Directory. 1397 1398=item FIFO 1399 1400FIFO (named pipe). 1401 1402=item SOCKET 1403 1404Socket. 1405 1406=back 1407 1408Returns the C<Archive::Tar::File> object that was just added, or 1409C<undef> on failure. 1410 1411=cut 1412 1413sub add_data { 1414 my $self = shift; 1415 my ($file, $data, $opt) = @_; 1416 1417 my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1418 unless( $obj ) { 1419 $self->_error( qq[Unable to add file: '$file'] ); 1420 return; 1421 } 1422 1423 push @{$self->{_data}}, $obj; 1424 1425 return $obj; 1426} 1427 1428=head2 $tar->error( [$BOOL] ) 1429 1430Returns the current errorstring (usually, the last error reported). 1431If a true value was specified, it will give the C<Carp::longmess> 1432equivalent of the error, in effect giving you a stacktrace. 1433 1434For backwards compatibility, this error is also available as 1435C<$Archive::Tar::error> although it is much recommended you use the 1436method call instead. 1437 1438=cut 1439 1440{ 1441 $error = ''; 1442 my $longmess; 1443 1444 sub _error { 1445 my $self = shift; 1446 my $msg = $error = shift; 1447 $longmess = Carp::longmess($error); 1448 if (ref $self) { 1449 $self->{_error} = $error; 1450 $self->{_longmess} = $longmess; 1451 } 1452 1453 ### set Archive::Tar::WARN to 0 to disable printing 1454 ### of errors 1455 if( $WARN ) { 1456 carp $DEBUG ? $longmess : $msg; 1457 } 1458 1459 return; 1460 } 1461 1462 sub error { 1463 my $self = shift; 1464 if (ref $self) { 1465 return shift() ? $self->{_longmess} : $self->{_error}; 1466 } else { 1467 return shift() ? $longmess : $error; 1468 } 1469 } 1470} 1471 1472=head2 $tar->setcwd( $cwd ); 1473 1474C<Archive::Tar> needs to know the current directory, and it will run 1475C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1476tarfile and saves it in the file system. (As of version 1.30, however, 1477C<Archive::Tar> will use the speed optimization described below 1478automatically, so it's only relevant if you're using C<extract_file()>). 1479 1480Since C<Archive::Tar> doesn't change the current directory internally 1481while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1482can be avoided if we can guarantee that the current directory doesn't 1483get changed externally. 1484 1485To use this performance boost, set the current directory via 1486 1487 use Cwd; 1488 $tar->setcwd( cwd() ); 1489 1490once before calling a function like C<extract_file> and 1491C<Archive::Tar> will use the current directory setting from then on 1492and won't call C<Cwd::cwd()> internally. 1493 1494To switch back to the default behaviour, use 1495 1496 $tar->setcwd( undef ); 1497 1498and C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1499 1500If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will 1501be called for you. 1502 1503=cut 1504 1505sub setcwd { 1506 my $self = shift; 1507 my $cwd = shift; 1508 1509 $self->{cwd} = $cwd; 1510} 1511 1512=head1 Class Methods 1513 1514=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1515 1516Creates a tar file from the list of files provided. The first 1517argument can either be the name of the tar file to create or a 1518reference to an open file handle (e.g. a GLOB reference). 1519 1520The second argument is used to indicate compression. You can either 1521compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1522to be the C<gzip> compression level (between 1 and 9), but the use of 1523constants is prefered: 1524 1525 # write a gzip compressed file 1526 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1527 1528 # write a bzip compressed file 1529 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1530 1531Note that when you pass in a filehandle, the compression argument 1532is ignored, as all files are printed verbatim to your filehandle. 1533If you wish to enable compression with filehandles, use an 1534C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1535 1536The remaining arguments list the files to be included in the tar file. 1537These files must all exist. Any files which don't exist or can't be 1538read are silently ignored. 1539 1540If the archive creation fails for any reason, C<create_archive> will 1541return false. Please use the C<error> method to find the cause of the 1542failure. 1543 1544Note that this method does not write C<on the fly> as it were; it 1545still reads all the files into memory before writing out the archive. 1546Consult the FAQ below if this is a problem. 1547 1548=cut 1549 1550sub create_archive { 1551 my $class = shift; 1552 1553 my $file = shift; return unless defined $file; 1554 my $gzip = shift || 0; 1555 my @files = @_; 1556 1557 unless( @files ) { 1558 return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1559 } 1560 1561 my $tar = $class->new; 1562 $tar->add_files( @files ); 1563 return $tar->write( $file, $gzip ); 1564} 1565 1566=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1567 1568Returns an iterator function that reads the tar file without loading 1569it all in memory. Each time the function is called it will return the 1570next file in the tarball. The files are returned as 1571C<Archive::Tar::File> objects. The iterator function returns the 1572empty list once it has exhausted the files contained. 1573 1574The second argument can be a hash reference with options, which are 1575identical to the arguments passed to C<read()>. 1576 1577Example usage: 1578 1579 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1580 1581 while( my $f = $next->() ) { 1582 print $f->name, "\n"; 1583 1584 $f->extract or warn "Extraction failed"; 1585 1586 # .... 1587 } 1588 1589=cut 1590 1591 1592sub iter { 1593 my $class = shift; 1594 my $filename = shift or return; 1595 my $compressed = shift or 0; 1596 my $opts = shift || {}; 1597 1598 ### get a handle to read from. 1599 my $handle = $class->_get_handle( 1600 $filename, 1601 $compressed, 1602 READ_ONLY->( ZLIB ) 1603 ) or return; 1604 1605 my @data; 1606 return sub { 1607 return shift(@data) if @data; # more than one file returned? 1608 return unless $handle; # handle exhausted? 1609 1610 ### read data, should only return file 1611 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1612 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 1613 1614 ### return one piece of data 1615 return shift(@data) if @data; 1616 1617 ### data is exhausted, free the filehandle 1618 undef $handle; 1619 return; 1620 }; 1621} 1622 1623=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1624 1625Returns a list of the names of all the files in the archive. The 1626first argument can either be the name of the tar file to list or a 1627reference to an open file handle (e.g. a GLOB reference). 1628 1629If C<list_archive()> is passed an array reference as its third 1630argument it returns a list of hash references containing the requested 1631properties of each file. The following list of properties is 1632supported: full_path, name, size, mtime (last modified date), mode, 1633uid, gid, linkname, uname, gname, devmajor, devminor, prefix. 1634 1635See C<Archive::Tar::File> for details about supported properties. 1636 1637Passing an array reference containing only one element, 'name', is 1638special cased to return a list of names rather than a list of hash 1639references. 1640 1641=cut 1642 1643sub list_archive { 1644 my $class = shift; 1645 my $file = shift; return unless defined $file; 1646 my $gzip = shift || 0; 1647 1648 my $tar = $class->new($file, $gzip); 1649 return unless $tar; 1650 1651 return $tar->list_files( @_ ); 1652} 1653 1654=head2 Archive::Tar->extract_archive($file, $compressed) 1655 1656Extracts the contents of the tar file. The first argument can either 1657be the name of the tar file to create or a reference to an open file 1658handle (e.g. a GLOB reference). All relative paths in the tar file will 1659be created underneath the current working directory. 1660 1661C<extract_archive> will return a list of files it extracted. 1662If the archive extraction fails for any reason, C<extract_archive> 1663will return false. Please use the C<error> method to find the cause 1664of the failure. 1665 1666=cut 1667 1668sub extract_archive { 1669 my $class = shift; 1670 my $file = shift; return unless defined $file; 1671 my $gzip = shift || 0; 1672 1673 my $tar = $class->new( ) or return; 1674 1675 return $tar->read( $file, $gzip, { extract => 1 } ); 1676} 1677 1678=head2 $bool = Archive::Tar->has_io_string 1679 1680Returns true if we currently have C<IO::String> support loaded. 1681 1682Either C<IO::String> or C<perlio> support is needed to support writing 1683stringified archives. Currently, C<perlio> is the preferred method, if 1684available. 1685 1686See the C<GLOBAL VARIABLES> section to see how to change this preference. 1687 1688=cut 1689 1690sub has_io_string { return $HAS_IO_STRING; } 1691 1692=head2 $bool = Archive::Tar->has_perlio 1693 1694Returns true if we currently have C<perlio> support loaded. 1695 1696This requires C<perl-5.8> or higher, compiled with C<perlio> 1697 1698Either C<IO::String> or C<perlio> support is needed to support writing 1699stringified archives. Currently, C<perlio> is the preferred method, if 1700available. 1701 1702See the C<GLOBAL VARIABLES> section to see how to change this preference. 1703 1704=cut 1705 1706sub has_perlio { return $HAS_PERLIO; } 1707 1708=head2 $bool = Archive::Tar->has_zlib_support 1709 1710Returns true if C<Archive::Tar> can extract C<zlib> compressed archives 1711 1712=cut 1713 1714sub has_zlib_support { return ZLIB } 1715 1716=head2 $bool = Archive::Tar->has_bzip2_support 1717 1718Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1719 1720=cut 1721 1722sub has_bzip2_support { return BZIP } 1723 1724=head2 Archive::Tar->can_handle_compressed_files 1725 1726A simple checking routine, which will return true if C<Archive::Tar> 1727is able to uncompress compressed archives on the fly with C<IO::Zlib> 1728and C<IO::Compress::Bzip2> or false if not both are installed. 1729 1730You can use this as a shortcut to determine whether C<Archive::Tar> 1731will do what you think before passing compressed archives to its 1732C<read> method. 1733 1734=cut 1735 1736sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1737 1738sub no_string_support { 1739 croak("You have to install IO::String to support writing archives to strings"); 1740} 1741 17421; 1743 1744__END__ 1745 1746=head1 GLOBAL VARIABLES 1747 1748=head2 $Archive::Tar::FOLLOW_SYMLINK 1749 1750Set this variable to C<1> to make C<Archive::Tar> effectively make a 1751copy of the file when extracting. Default is C<0>, which 1752means the symlink stays intact. Of course, you will have to pack the 1753file linked to as well. 1754 1755This option is checked when you write out the tarfile using C<write> 1756or C<create_archive>. 1757 1758This works just like C</bin/tar>'s C<-h> option. 1759 1760=head2 $Archive::Tar::CHOWN 1761 1762By default, C<Archive::Tar> will try to C<chown> your files if it is 1763able to. In some cases, this may not be desired. In that case, set 1764this variable to C<0> to disable C<chown>-ing, even if it were 1765possible. 1766 1767The default is C<1>. 1768 1769=head2 $Archive::Tar::CHMOD 1770 1771By default, C<Archive::Tar> will try to C<chmod> your files to 1772whatever mode was specified for the particular file in the archive. 1773In some cases, this may not be desired. In that case, set this 1774variable to C<0> to disable C<chmod>-ing. 1775 1776The default is C<1>. 1777 1778=head2 $Archive::Tar::SAME_PERMISSIONS 1779 1780When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 1781the permissions on files from the archive are used without modification 1782of if they are filtered by removing any setid bits and applying the 1783current umask. 1784 1785The default is C<1> for the root user and C<0> for normal users. 1786 1787=head2 $Archive::Tar::DO_NOT_USE_PREFIX 1788 1789By default, C<Archive::Tar> will try to put paths that are over 1790100 characters in the C<prefix> field of your tar header, as 1791defined per POSIX-standard. However, some (older) tar programs 1792do not implement this spec. To retain compatibility with these older 1793or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 1794variable to a true value, and C<Archive::Tar> will use an alternate 1795way of dealing with paths over 100 characters by using the 1796C<GNU Extended Header> feature. 1797 1798Note that clients who do not support the C<GNU Extended Header> 1799feature will not be able to read these archives. Such clients include 1800tars on C<Solaris>, C<Irix> and C<AIX>. 1801 1802The default is C<0>. 1803 1804=head2 $Archive::Tar::DEBUG 1805 1806Set this variable to C<1> to always get the C<Carp::longmess> output 1807of the warnings, instead of the regular C<carp>. This is the same 1808message you would get by doing: 1809 1810 $tar->error(1); 1811 1812Defaults to C<0>. 1813 1814=head2 $Archive::Tar::WARN 1815 1816Set this variable to C<0> if you do not want any warnings printed. 1817Personally I recommend against doing this, but people asked for the 1818option. Also, be advised that this is of course not threadsafe. 1819 1820Defaults to C<1>. 1821 1822=head2 $Archive::Tar::error 1823 1824Holds the last reported error. Kept for historical reasons, but its 1825use is very much discouraged. Use the C<error()> method instead: 1826 1827 warn $tar->error unless $tar->extract; 1828 1829Note that in older versions of this module, the C<error()> method 1830would return an effectively global value even when called an instance 1831method as above. This has since been fixed, and multiple instances of 1832C<Archive::Tar> now have separate error strings. 1833 1834=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 1835 1836This variable indicates whether C<Archive::Tar> should allow 1837files to be extracted outside their current working directory. 1838 1839Allowing this could have security implications, as a malicious 1840tar archive could alter or replace any file the extracting user 1841has permissions to. Therefor, the default is to not allow 1842insecure extractions. 1843 1844If you trust the archive, or have other reasons to allow the 1845archive to write files outside your current working directory, 1846set this variable to C<true>. 1847 1848Note that this is a backwards incompatible change from version 1849C<1.36> and before. 1850 1851=head2 $Archive::Tar::HAS_PERLIO 1852 1853This variable holds a boolean indicating if we currently have 1854C<perlio> support loaded. This will be enabled for any perl 1855greater than C<5.8> compiled with C<perlio>. 1856 1857If you feel strongly about disabling it, set this variable to 1858C<false>. Note that you will then need C<IO::String> installed 1859to support writing stringified archives. 1860 1861Don't change this variable unless you B<really> know what you're 1862doing. 1863 1864=head2 $Archive::Tar::HAS_IO_STRING 1865 1866This variable holds a boolean indicating if we currently have 1867C<IO::String> support loaded. This will be enabled for any perl 1868that has a loadable C<IO::String> module. 1869 1870If you feel strongly about disabling it, set this variable to 1871C<false>. Note that you will then need C<perlio> support from 1872your perl to be able to write stringified archives. 1873 1874Don't change this variable unless you B<really> know what you're 1875doing. 1876 1877=head1 FAQ 1878 1879=over 4 1880 1881=item What's the minimum perl version required to run Archive::Tar? 1882 1883You will need perl version 5.005_03 or newer. 1884 1885=item Isn't Archive::Tar slow? 1886 1887Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 1888However, it's very portable. If speed is an issue, consider using 1889C</bin/tar> instead. 1890 1891=item Isn't Archive::Tar heavier on memory than /bin/tar? 1892 1893Yes it is, see previous answer. Since C<Compress::Zlib> and therefore 1894C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 1895choice but to read the archive into memory. 1896This is ok if you want to do in-memory manipulation of the archive. 1897 1898If you just want to extract, use the C<extract_archive> class method 1899instead. It will optimize and write to disk immediately. 1900 1901Another option is to use the C<iter> class method to iterate over 1902the files in the tarball without reading them all in memory at once. 1903 1904=item Can you lazy-load data instead? 1905 1906In some cases, yes. You can use the C<iter> class method to iterate 1907over the files in the tarball without reading them all in memory at once. 1908 1909=item How much memory will an X kb tar file need? 1910 1911Probably more than X kb, since it will all be read into memory. If 1912this is a problem, and you don't need to do in memory manipulation 1913of the archive, consider using the C<iter> class method, or C</bin/tar> 1914instead. 1915 1916=item What do you do with unsupported filetypes in an archive? 1917 1918C<Unix> has a few filetypes that aren't supported on other platforms, 1919like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 1920try to make a copy of the original file, rather than throwing an error. 1921 1922This does require you to read the entire archive in to memory first, 1923since otherwise we wouldn't know what data to fill the copy with. 1924(This means that you cannot use the class methods, including C<iter> 1925on archives that have incompatible filetypes and still expect things 1926to work). 1927 1928For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 1929the extraction of this particular item didn't work. 1930 1931=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 1932 1933By default, C<Archive::Tar> is in a completely POSIX-compatible 1934mode, which uses the POSIX-specification of C<tar> to store files. 1935For paths greather than 100 characters, this is done using the 1936C<POSIX header prefix>. Non-POSIX-compatible clients may not support 1937this part of the specification, and may only support the C<GNU Extended 1938Header> functionality. To facilitate those clients, you can set the 1939C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 1940C<GLOBAL VARIABLES> section for details on this variable. 1941 1942Note that GNU tar earlier than version 1.14 does not cope well with 1943the C<POSIX header prefix>. If you use such a version, consider setting 1944the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 1945 1946=item How do I extract only files that have property X from an archive? 1947 1948Sometimes, you might not wish to extract a complete archive, just 1949the files that are relevant to you, based on some criteria. 1950 1951You can do this by filtering a list of C<Archive::Tar::File> objects 1952based on your criteria. For example, to extract only files that have 1953the string C<foo> in their title, you would use: 1954 1955 $tar->extract( 1956 grep { $_->full_path =~ /foo/ } $tar->get_files 1957 ); 1958 1959This way, you can filter on any attribute of the files in the archive. 1960Consult the C<Archive::Tar::File> documentation on how to use these 1961objects. 1962 1963=item How do I access .tar.Z files? 1964 1965The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 1966the C<IO::Zlib> module) to access tar files that have been compressed 1967with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 1968utility cannot be read by C<Compress::Zlib> and so cannot be directly 1969accesses by C<Archive::Tar>. 1970 1971If the C<uncompress> or C<gunzip> programs are available, you can use 1972one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 1973 1974Firstly with C<uncompress> 1975 1976 use Archive::Tar; 1977 1978 open F, "uncompress -c $filename |"; 1979 my $tar = Archive::Tar->new(*F); 1980 ... 1981 1982and this with C<gunzip> 1983 1984 use Archive::Tar; 1985 1986 open F, "gunzip -c $filename |"; 1987 my $tar = Archive::Tar->new(*F); 1988 ... 1989 1990Similarly, if the C<compress> program is available, you can use this to 1991write a C<.tar.Z> file 1992 1993 use Archive::Tar; 1994 use IO::File; 1995 1996 my $fh = new IO::File "| compress -c >$filename"; 1997 my $tar = Archive::Tar->new(); 1998 ... 1999 $tar->write($fh); 2000 $fh->close ; 2001 2002=item How do I handle Unicode strings? 2003 2004C<Archive::Tar> uses byte semantics for any files it reads from or writes 2005to disk. This is not a problem if you only deal with files and never 2006look at their content or work solely with byte strings. But if you use 2007Unicode strings with character semantics, some additional steps need 2008to be taken. 2009 2010For example, if you add a Unicode string like 2011 2012 # Problem 2013 $tar->add_data('file.txt', "Euro: \x{20AC}"); 2014 2015then there will be a problem later when the tarfile gets written out 2016to disk via C<$tar->write()>: 2017 2018 Wide character in print at .../Archive/Tar.pm line 1014. 2019 2020The data was added as a Unicode string and when writing it out to disk, 2021the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2022tried to convert the string to ISO-8859 and failed. The written file 2023now contains garbage. 2024 2025For this reason, Unicode strings need to be converted to UTF-8-encoded 2026bytestrings before they are handed off to C<add_data()>: 2027 2028 use Encode; 2029 my $data = "Accented character: \x{20AC}"; 2030 $data = encode('utf8', $data); 2031 2032 $tar->add_data('file.txt', $data); 2033 2034A opposite problem occurs if you extract a UTF8-encoded file from a 2035tarball. Using C<get_content()> on the C<Archive::Tar::File> object 2036will return its content as a bytestring, not as a Unicode string. 2037 2038If you want it to be a Unicode string (because you want character 2039semantics with operations like regular expression matching), you need 2040to decode the UTF8-encoded content and have Perl convert it into 2041a Unicode string: 2042 2043 use Encode; 2044 my $data = $tar->get_content(); 2045 2046 # Make it a Unicode string 2047 $data = decode('utf8', $data); 2048 2049There is no easy way to provide this functionality in C<Archive::Tar>, 2050because a tarball can contain many files, and each of which could be 2051encoded in a different way. 2052 2053=back 2054 2055=head1 CAVEATS 2056 2057The AIX tar does not fill all unused space in the tar archive with 0x00. 2058This sometimes leads to warning messages from C<Archive::Tar>. 2059 2060 Invalid header block at offset nnn 2061 2062A fix for that problem is scheduled to be released in the following levels 2063of AIX, all of which should be coming out in the 4th quarter of 2009: 2064 2065 AIX 5.3 TL7 SP10 2066 AIX 5.3 TL8 SP8 2067 AIX 5.3 TL9 SP5 2068 AIX 5.3 TL10 SP2 2069 2070 AIX 6.1 TL0 SP11 2071 AIX 6.1 TL1 SP7 2072 AIX 6.1 TL2 SP6 2073 AIX 6.1 TL3 SP3 2074 2075The IBM APAR number for this problem is IZ50240 (Reported component ID: 20765765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2077If you need an ifix please contact your local IBM AIX support. 2078 2079=head1 TODO 2080 2081=over 4 2082 2083=item Check if passed in handles are open for read/write 2084 2085Currently I don't know of any portable pure perl way to do this. 2086Suggestions welcome. 2087 2088=item Allow archives to be passed in as string 2089 2090Currently, we only allow opened filehandles or filenames, but 2091not strings. The internals would need some reworking to facilitate 2092stringified archives. 2093 2094=item Facilitate processing an opened filehandle of a compressed archive 2095 2096Currently, we only support this if the filehandle is an IO::Zlib object. 2097Environments, like apache, will present you with an opened filehandle 2098to an uploaded file, which might be a compressed archive. 2099 2100=back 2101 2102=head1 SEE ALSO 2103 2104=over 4 2105 2106=item The GNU tar specification 2107 2108C<http://www.gnu.org/software/tar/manual/tar.html> 2109 2110=item The PAX format specication 2111 2112The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2113 2114=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2115 2116=item GNU tar intends to switch to POSIX compatibility 2117 2118GNU Tar authors have expressed their intention to become completely 2119POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2120 2121=item A Comparison between various tar implementations 2122 2123Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2124 2125=back 2126 2127=head1 AUTHOR 2128 2129This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2130 2131Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2132 2133=head1 ACKNOWLEDGEMENTS 2134 2135Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2136Rainer Tammer and especially Andrew Savige for their help and suggestions. 2137 2138=head1 COPYRIGHT 2139 2140This module is copyright (c) 2002 - 2009 Jos Boumans 2141E<lt>kane@cpan.orgE<gt>. All rights reserved. 2142 2143This library is free software; you may redistribute and/or modify 2144it under the same terms as Perl itself. 2145 2146=cut 2147