xref: /openbsd-src/gnu/usr.bin/perl/cpan/Archive-Tar/lib/Archive/Tar.pm (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp                qw(carp croak);
14use File::Spec          ();
15use File::Spec::Unix    ();
16use File::Path          ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
26            $INSECURE_EXTRACT_MODE @ISA @EXPORT
27         ];
28
29@ISA                    = qw[Exporter];
30@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
31$DEBUG                  = 0;
32$WARN                   = 1;
33$FOLLOW_SYMLINK         = 0;
34$VERSION                = "1.54";
35$CHOWN                  = 1;
36$CHMOD                  = 1;
37$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
38$DO_NOT_USE_PREFIX      = 0;
39$INSECURE_EXTRACT_MODE  = 0;
40
41BEGIN {
42    use Config;
43    $HAS_PERLIO = $Config::Config{useperlio};
44
45    ### try and load IO::String anyway, so you can dynamically
46    ### switch between perlio and IO::String
47    $HAS_IO_STRING = eval {
48        require IO::String;
49        import IO::String;
50        1;
51    } || 0;
52}
53
54=head1 NAME
55
56Archive::Tar - module for manipulations of tar archives
57
58=head1 SYNOPSIS
59
60    use Archive::Tar;
61    my $tar = Archive::Tar->new;
62
63    $tar->read('origin.tgz');
64    $tar->extract();
65
66    $tar->add_files('file/foo.pl', 'docs/README');
67    $tar->add_data('file/baz.txt', 'This is the contents now');
68
69    $tar->rename('oldname', 'new/file/name');
70
71    $tar->write('files.tar');                   # plain tar
72    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
73    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
74
75=head1 DESCRIPTION
76
77Archive::Tar provides an object oriented mechanism for handling tar
78files.  It provides class methods for quick and easy files handling
79while also allowing for the creation of tar file objects for custom
80manipulation.  If you have the IO::Zlib module installed,
81Archive::Tar will also support compressed or gzipped tar files.
82
83An object of class Archive::Tar represents a .tar(.gz) archive full
84of files and things.
85
86=head1 Object Methods
87
88=head2 Archive::Tar->new( [$file, $compressed] )
89
90Returns a new Tar object. If given any arguments, C<new()> calls the
91C<read()> method automatically, passing on the arguments provided to
92the C<read()> method.
93
94If C<new()> is invoked with arguments and the C<read()> method fails
95for any reason, C<new()> returns undef.
96
97=cut
98
99my $tmpl = {
100    _data   => [ ],
101    _file   => 'Unknown',
102};
103
104### install get/set accessors for this object.
105for my $key ( keys %$tmpl ) {
106    no strict 'refs';
107    *{__PACKAGE__."::$key"} = sub {
108        my $self = shift;
109        $self->{$key} = $_[0] if @_;
110        return $self->{$key};
111    }
112}
113
114sub new {
115    my $class = shift;
116    $class = ref $class if ref $class;
117
118    ### copying $tmpl here since a shallow copy makes it use the
119    ### same aref, causing for files to remain in memory always.
120    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
121
122    if (@_) {
123        unless ( $obj->read( @_ ) ) {
124            $obj->_error(qq[No data could be read from file]);
125            return;
126        }
127    }
128
129    return $obj;
130}
131
132=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
133
134Read the given tar file into memory.
135The first argument can either be the name of a file or a reference to
136an already open filehandle (or an IO::Zlib object if it's compressed)
137
138The C<read> will I<replace> any previous content in C<$tar>!
139
140The second argument may be considered optional, but remains for
141backwards compatibility. Archive::Tar now looks at the file
142magic to determine what class should be used to open the file
143and will transparently Do The Right Thing.
144
145Archive::Tar will warn if you try to pass a bzip2 compressed file and the
146IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
147
148Note that you can currently B<not> pass a C<gzip> compressed
149filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
150filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
151containing the full archive information (either compressed or
152uncompressed). These are worth while features, but not currently
153implemented. See the C<TODO> section.
154
155The third argument can be a hash reference with options. Note that
156all options are case-sensitive.
157
158=over 4
159
160=item limit
161
162Do not read more than C<limit> files. This is useful if you have
163very big archives, and are only interested in the first few files.
164
165=item filter
166
167Can be set to a regular expression.  Only files with names that match
168the expression will be read.
169
170=item extract
171
172If set to true, immediately extract entries when reading them. This
173gives you the same memory break as the C<extract_archive> function.
174Note however that entries will not be read into memory, but written
175straight to disk. This means no C<Archive::Tar::File> objects are
176created for you to inspect.
177
178=back
179
180All files are stored internally as C<Archive::Tar::File> objects.
181Please consult the L<Archive::Tar::File> documentation for details.
182
183Returns the number of files read in scalar context, and a list of
184C<Archive::Tar::File> objects in list context.
185
186=cut
187
188sub read {
189    my $self = shift;
190    my $file = shift;
191    my $gzip = shift || 0;
192    my $opts = shift || {};
193
194    unless( defined $file ) {
195        $self->_error( qq[No file to read from!] );
196        return;
197    } else {
198        $self->_file( $file );
199    }
200
201    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
202                    or return;
203
204    my $data = $self->_read_tar( $handle, $opts ) or return;
205
206    $self->_data( $data );
207
208    return wantarray ? @$data : scalar @$data;
209}
210
211sub _get_handle {
212    my $self     = shift;
213    my $file     = shift;   return unless defined $file;
214                            return $file if ref $file;
215    my $compress = shift || 0;
216    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
217
218
219    ### get a FH opened to the right class, so we can use it transparently
220    ### throughout the program
221    my $fh;
222    {   ### reading magic only makes sense if we're opening a file for
223        ### reading. otherwise, just use what the user requested.
224        my $magic = '';
225        if( MODE_READ->($mode) ) {
226            open my $tmp, $file or do {
227                $self->_error( qq[Could not open '$file' for reading: $!] );
228                return;
229            };
230
231            ### read the first 4 bites of the file to figure out which class to
232            ### use to open the file.
233            sysread( $tmp, $magic, 4 );
234            close $tmp;
235        }
236
237        ### is it bzip?
238        ### if you asked specifically for bzip compression, or if we're in
239        ### read mode and the magic numbers add up, use bzip
240        if( BZIP and (
241                ($compress eq COMPRESS_BZIP) or
242                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
243            )
244        ) {
245
246            ### different reader/writer modules, different error vars... sigh
247            if( MODE_READ->($mode) ) {
248                $fh = IO::Uncompress::Bunzip2->new( $file ) or do {
249                    $self->_error( qq[Could not read '$file': ] .
250                        $IO::Uncompress::Bunzip2::Bunzip2Error
251                    );
252                    return;
253                };
254
255            } else {
256                $fh = IO::Compress::Bzip2->new( $file ) or do {
257                    $self->_error( qq[Could not write to '$file': ] .
258                        $IO::Compress::Bzip2::Bzip2Error
259                    );
260                    return;
261                };
262            }
263
264        ### is it gzip?
265        ### if you asked for compression, if you wanted to read or the gzip
266        ### magic number is present (redundant with read)
267        } elsif( ZLIB and (
268                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
269                 )
270        ) {
271            $fh = IO::Zlib->new;
272
273            unless( $fh->open( $file, $mode ) ) {
274                $self->_error(qq[Could not create filehandle for '$file': $!]);
275                return;
276            }
277
278        ### is it plain tar?
279        } else {
280            $fh = IO::File->new;
281
282            unless( $fh->open( $file, $mode ) ) {
283                $self->_error(qq[Could not create filehandle for '$file': $!]);
284                return;
285            }
286
287            ### enable bin mode on tar archives
288            binmode $fh;
289        }
290    }
291
292    return $fh;
293}
294
295
296sub _read_tar {
297    my $self    = shift;
298    my $handle  = shift or return;
299    my $opts    = shift || {};
300
301    my $count   = $opts->{limit}    || 0;
302    my $filter  = $opts->{filter};
303    my $extract = $opts->{extract}  || 0;
304
305    ### set a cap on the amount of files to extract ###
306    my $limit   = 0;
307    $limit = 1 if $count > 0;
308
309    my $tarfile = [ ];
310    my $chunk;
311    my $read = 0;
312    my $real_name;  # to set the name of a file when
313                    # we're encountering @longlink
314    my $data;
315
316    LOOP:
317    while( $handle->read( $chunk, HEAD ) ) {
318        ### IO::Zlib doesn't support this yet
319        my $offset = eval { tell $handle } || 'unknown';
320
321        unless( $read++ ) {
322            my $gzip = GZIP_MAGIC_NUM;
323            if( $chunk =~ /$gzip/ ) {
324                $self->_error( qq[Cannot read compressed format in tar-mode] );
325                return;
326            }
327
328            ### size is < HEAD, which means a corrupted file, as the minimum
329            ### length is _at least_ HEAD
330            if (length $chunk != HEAD) {
331                $self->_error( qq[Cannot read enough bytes from the tarfile] );
332                return;
333            }
334        }
335
336        ### if we can't read in all bytes... ###
337        last if length $chunk != HEAD;
338
339        ### Apparently this should really be two blocks of 512 zeroes,
340        ### but GNU tar sometimes gets it wrong. See comment in the
341        ### source code (tar.c) to GNU cpio.
342        next if $chunk eq TAR_END;
343
344        ### according to the posix spec, the last 12 bytes of the header are
345        ### null bytes, to pad it to a 512 byte block. That means if these
346        ### bytes are NOT null bytes, it's a corrrupt header. See:
347        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
348        ### line 111
349        {   my $nulls = join '', "\0" x 12;
350            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
351                $self->_error( qq[Invalid header block at offset $offset] );
352                next LOOP;
353            }
354        }
355
356        ### pass the realname, so we can set it 'proper' right away
357        ### some of the heuristics are done on the name, so important
358        ### to set it ASAP
359        my $entry;
360        {   my %extra_args = ();
361            $extra_args{'name'} = $$real_name if defined $real_name;
362
363            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
364                                                        %extra_args )
365            ) {
366                $self->_error( qq[Couldn't read chunk at offset $offset] );
367                next LOOP;
368            }
369        }
370
371        ### ignore labels:
372        ### http://www.gnu.org/manual/tar/html_node/tar_139.html
373        next if $entry->is_label;
374
375        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
376
377            if ( $entry->is_file && !$entry->validate ) {
378                ### sometimes the chunk is rather fux0r3d and a whole 512
379                ### bytes ends up in the ->name area.
380                ### clean it up, if need be
381                my $name = $entry->name;
382                $name = substr($name, 0, 100) if length $name > 100;
383                $name =~ s/\n/ /g;
384
385                $self->_error( $name . qq[: checksum error] );
386                next LOOP;
387            }
388
389            my $block = BLOCK_SIZE->( $entry->size );
390
391            $data = $entry->get_content_by_ref;
392
393            ### just read everything into memory
394            ### can't do lazy loading since IO::Zlib doesn't support 'seek'
395            ### this is because Compress::Zlib doesn't support it =/
396            ### this reads in the whole data in one read() call.
397            if( $handle->read( $$data, $block ) < $block ) {
398                $self->_error( qq[Read error on tarfile (missing data) '].
399                                    $entry->full_path ."' at offset $offset" );
400                next LOOP;
401            }
402
403            ### throw away trailing garbage ###
404            substr ($$data, $entry->size) = "" if defined $$data;
405
406            ### part II of the @LongLink munging -- need to do /after/
407            ### the checksum check.
408            if( $entry->is_longlink ) {
409                ### weird thing in tarfiles -- if the file is actually a
410                ### @LongLink, the data part seems to have a trailing ^@
411                ### (unprintable) char. to display, pipe output through less.
412                ### but that doesn't *always* happen.. so check if the last
413                ### character is a control character, and if so remove it
414                ### at any rate, we better remove that character here, or tests
415                ### like 'eq' and hashlook ups based on names will SO not work
416                ### remove it by calculating the proper size, and then
417                ### tossing out everything that's longer than that size.
418
419                ### count number of nulls
420                my $nulls = $$data =~ tr/\0/\0/;
421
422                ### cut data + size by that many bytes
423                $entry->size( $entry->size - $nulls );
424                substr ($$data, $entry->size) = "";
425            }
426        }
427
428        ### clean up of the entries.. posix tar /apparently/ has some
429        ### weird 'feature' that allows for filenames > 255 characters
430        ### they'll put a header in with as name '././@LongLink' and the
431        ### contents will be the name of the /next/ file in the archive
432        ### pretty crappy and kludgy if you ask me
433
434        ### set the name for the next entry if this is a @LongLink;
435        ### this is one ugly hack =/ but needed for direct extraction
436        if( $entry->is_longlink ) {
437            $real_name = $data;
438            next LOOP;
439        } elsif ( defined $real_name ) {
440            $entry->name( $$real_name );
441            $entry->prefix('');
442            undef $real_name;
443        }
444
445        ### skip this entry if we're filtering
446        if ($filter && $entry->name !~ $filter) {
447            next LOOP;
448
449        ### skip this entry if it's a pax header. This is a special file added
450        ### by, among others, git-generated tarballs. It holds comments and is
451        ### not meant for extracting. See #38932: pax_global_header extracted
452        } elsif ( $entry->name eq PAX_HEADER ) {
453            next LOOP;
454        }
455
456        $self->_extract_file( $entry ) if $extract
457                                            && !$entry->is_longlink
458                                            && !$entry->is_unknown
459                                            && !$entry->is_label;
460
461        ### Guard against tarfiles with garbage at the end
462	    last LOOP if $entry->name eq '';
463
464        ### push only the name on the rv if we're extracting
465        ### -- for extract_archive
466        push @$tarfile, ($extract ? $entry->name : $entry);
467
468        if( $limit ) {
469            $count-- unless $entry->is_longlink || $entry->is_dir;
470            last LOOP unless $count;
471        }
472    } continue {
473        undef $data;
474    }
475
476    return $tarfile;
477}
478
479=head2 $tar->contains_file( $filename )
480
481Check if the archive contains a certain file.
482It will return true if the file is in the archive, false otherwise.
483
484Note however, that this function does an exact match using C<eq>
485on the full path. So it cannot compensate for case-insensitive file-
486systems or compare 2 paths to see if they would point to the same
487underlying file.
488
489=cut
490
491sub contains_file {
492    my $self = shift;
493    my $full = shift;
494
495    return unless defined $full;
496
497    ### don't warn if the entry isn't there.. that's what this function
498    ### is for after all.
499    local $WARN = 0;
500    return 1 if $self->_find_entry($full);
501    return;
502}
503
504=head2 $tar->extract( [@filenames] )
505
506Write files whose names are equivalent to any of the names in
507C<@filenames> to disk, creating subdirectories as necessary. This
508might not work too well under VMS.
509Under MacPerl, the file's modification time will be converted to the
510MacOS zero of time, and appropriate conversions will be done to the
511path.  However, the length of each element of the path is not
512inspected to see whether it's longer than MacOS currently allows (32
513characters).
514
515If C<extract> is called without a list of file names, the entire
516contents of the archive are extracted.
517
518Returns a list of filenames extracted.
519
520=cut
521
522sub extract {
523    my $self    = shift;
524    my @args    = @_;
525    my @files;
526
527    # use the speed optimization for all extracted files
528    local($self->{cwd}) = cwd() unless $self->{cwd};
529
530    ### you requested the extraction of only certian files
531    if( @args ) {
532        for my $file ( @args ) {
533
534            ### it's already an object?
535            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
536                push @files, $file;
537                next;
538
539            ### go find it then
540            } else {
541
542                my $found;
543                for my $entry ( @{$self->_data} ) {
544                    next unless $file eq $entry->full_path;
545
546                    ### we found the file you're looking for
547                    push @files, $entry;
548                    $found++;
549                }
550
551                unless( $found ) {
552                    return $self->_error(
553                        qq[Could not find '$file' in archive] );
554                }
555            }
556        }
557
558    ### just grab all the file items
559    } else {
560        @files = $self->get_files;
561    }
562
563    ### nothing found? that's an error
564    unless( scalar @files ) {
565        $self->_error( qq[No files found for ] . $self->_file );
566        return;
567    }
568
569    ### now extract them
570    for my $entry ( @files ) {
571        unless( $self->_extract_file( $entry ) ) {
572            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
573            return;
574        }
575    }
576
577    return @files;
578}
579
580=head2 $tar->extract_file( $file, [$extract_path] )
581
582Write an entry, whose name is equivalent to the file name provided to
583disk. Optionally takes a second parameter, which is the full native
584path (including filename) the entry will be written to.
585
586For example:
587
588    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
589
590    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
591
592Returns true on success, false on failure.
593
594=cut
595
596sub extract_file {
597    my $self = shift;
598    my $file = shift;   return unless defined $file;
599    my $alt  = shift;
600
601    my $entry = $self->_find_entry( $file )
602        or $self->_error( qq[Could not find an entry for '$file'] ), return;
603
604    return $self->_extract_file( $entry, $alt );
605}
606
607sub _extract_file {
608    my $self    = shift;
609    my $entry   = shift or return;
610    my $alt     = shift;
611
612    ### you wanted an alternate extraction location ###
613    my $name = defined $alt ? $alt : $entry->full_path;
614
615                            ### splitpath takes a bool at the end to indicate
616                            ### that it's splitting a dir
617    my ($vol,$dirs,$file);
618    if ( defined $alt ) { # It's a local-OS path
619        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
620                                                          $entry->is_dir );
621    } else {
622        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
623                                                          $entry->is_dir );
624    }
625
626    my $dir;
627    ### is $name an absolute path? ###
628    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
629
630        ### absolute names are not allowed to be in tarballs under
631        ### strict mode, so only allow it if a user tells us to do it
632        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
633            $self->_error(
634                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
635                q[Not extracting absolute paths under SECURE EXTRACT MODE]
636            );
637            return;
638        }
639
640        ### user asked us to, it's fine.
641        $dir = File::Spec->catpath( $vol, $dirs, "" );
642
643    ### it's a relative path ###
644    } else {
645        my $cwd     = (ref $self and defined $self->{cwd})
646                        ? $self->{cwd}
647                        : cwd();
648
649        my @dirs = defined $alt
650            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
651            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
652                                                    # straight from the tarball
653
654        if( not defined $alt            and
655            not $INSECURE_EXTRACT_MODE
656        ) {
657
658            ### paths that leave the current directory are not allowed under
659            ### strict mode, so only allow it if a user tells us to do this.
660            if( grep { $_ eq '..' } @dirs ) {
661
662                $self->_error(
663                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
664                    q[the current working directory. Not extracting under ].
665                    q[SECURE EXTRACT MODE]
666                );
667                return;
668            }
669
670            ### the archive may be asking us to extract into a symlink. This
671            ### is not sane and a possible security issue, as outlined here:
672            ### https://rt.cpan.org/Ticket/Display.html?id=30380
673            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
674            ### https://issues.rpath.com/browse/RPL-1716
675            my $full_path = $cwd;
676            for my $d ( @dirs ) {
677                $full_path = File::Spec->catdir( $full_path, $d );
678
679                ### we've already checked this one, and it's safe. Move on.
680                next if ref $self and $self->{_link_cache}->{$full_path};
681
682                if( -l $full_path ) {
683                    my $to   = readlink $full_path;
684                    my $diag = "symlinked directory ($full_path => $to)";
685
686                    $self->_error(
687                        q[Entry ']. $entry->full_path .q[' is attempting to ].
688                        qq[extract to a $diag. This is considered a security ].
689                        q[vulnerability and not allowed under SECURE EXTRACT ].
690                        q[MODE]
691                    );
692                    return;
693                }
694
695                ### XXX keep a cache if possible, so the stats become cheaper:
696                $self->{_link_cache}->{$full_path} = 1 if ref $self;
697            }
698        }
699
700        ### '.' is the directory delimiter on VMS, which has to be escaped
701        ### or changed to '_' on vms.  vmsify is used, because older versions
702        ### of vmspath do not handle this properly.
703        ### Must not add a '/' to an empty directory though.
704        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
705
706        my ($cwd_vol,$cwd_dir,$cwd_file)
707                    = File::Spec->splitpath( $cwd );
708        my @cwd     = File::Spec->splitdir( $cwd_dir );
709        push @cwd, $cwd_file if length $cwd_file;
710
711        ### We need to pass '' as the last elemant to catpath. Craig Berry
712        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
713        ### The root problem is that splitpath on UNIX always returns the
714        ### final path element as a file even if it is a directory, and of
715        ### course there is no way it can know the difference without checking
716        ### against the filesystem, which it is documented as not doing.  When
717        ### you turn around and call catpath, on VMS you have to know which bits
718        ### are directory bits and which bits are file bits.  In this case we
719        ### know the result should be a directory.  I had thought you could omit
720        ### the file argument to catpath in such a case, but apparently on UNIX
721        ### you can't.
722        $dir        = File::Spec->catpath(
723                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
724                        );
725
726        ### catdir() returns undef if the path is longer than 255 chars on
727        ### older VMS systems.
728        unless ( defined $dir ) {
729            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
730            return;
731        }
732
733    }
734
735    if( -e $dir && !-d _ ) {
736        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
737        return;
738    }
739
740    unless ( -d _ ) {
741        eval { File::Path::mkpath( $dir, 0, 0777 ) };
742        if( $@ ) {
743            my $fp = $entry->full_path;
744            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
745            return;
746        }
747
748        ### XXX chown here? that might not be the same as in the archive
749        ### as we're only chown'ing to the owner of the file we're extracting
750        ### not to the owner of the directory itself, which may or may not
751        ### be another entry in the archive
752        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
753        ### way to go.
754        #if( $CHOWN && CAN_CHOWN ) {
755        #    chown $entry->uid, $entry->gid, $dir or
756        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
757        #}
758    }
759
760    ### we're done if we just needed to create a dir ###
761    return 1 if $entry->is_dir;
762
763    my $full = File::Spec->catfile( $dir, $file );
764
765    if( $entry->is_unknown ) {
766        $self->_error( qq[Unknown file type for file '$full'] );
767        return;
768    }
769
770    if( length $entry->type && $entry->is_file ) {
771        my $fh = IO::File->new;
772        $fh->open( '>' . $full ) or (
773            $self->_error( qq[Could not open file '$full': $!] ),
774            return
775        );
776
777        if( $entry->size ) {
778            binmode $fh;
779            syswrite $fh, $entry->data or (
780                $self->_error( qq[Could not write data to '$full'] ),
781                return
782            );
783        }
784
785        close $fh or (
786            $self->_error( qq[Could not close file '$full'] ),
787            return
788        );
789
790    } else {
791        $self->_make_special_file( $entry, $full ) or return;
792    }
793
794    ### only update the timestamp if it's not a symlink; that will change the
795    ### timestamp of the original. This addresses bug #33669: Could not update
796    ### timestamp warning on symlinks
797    if( not -l $full ) {
798        utime time, $entry->mtime - TIME_OFFSET, $full or
799            $self->_error( qq[Could not update timestamp] );
800    }
801
802    if( $CHOWN && CAN_CHOWN->() ) {
803        chown $entry->uid, $entry->gid, $full or
804            $self->_error( qq[Could not set uid/gid on '$full'] );
805    }
806
807    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
808    ### change the perms on the file they're linking too...
809    if( $CHMOD and not -l $full ) {
810        my $mode = $entry->mode;
811        unless ($SAME_PERMISSIONS) {
812            $mode &= ~(oct(7000) | umask);
813        }
814        chmod $mode, $full or
815            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
816    }
817
818    return 1;
819}
820
821sub _make_special_file {
822    my $self    = shift;
823    my $entry   = shift     or return;
824    my $file    = shift;    return unless defined $file;
825
826    my $err;
827
828    if( $entry->is_symlink ) {
829        my $fail;
830        if( ON_UNIX ) {
831            symlink( $entry->linkname, $file ) or $fail++;
832
833        } else {
834            $self->_extract_special_file_as_plain_file( $entry, $file )
835                or $fail++;
836        }
837
838        $err =  qq[Making symbolic link '$file' to '] .
839                $entry->linkname .q[' failed] if $fail;
840
841    } elsif ( $entry->is_hardlink ) {
842        my $fail;
843        if( ON_UNIX ) {
844            link( $entry->linkname, $file ) or $fail++;
845
846        } else {
847            $self->_extract_special_file_as_plain_file( $entry, $file )
848                or $fail++;
849        }
850
851        $err =  qq[Making hard link from '] . $entry->linkname .
852                qq[' to '$file' failed] if $fail;
853
854    } elsif ( $entry->is_fifo ) {
855        ON_UNIX && !system('mknod', $file, 'p') or
856            $err = qq[Making fifo ']. $entry->name .qq[' failed];
857
858    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
859        my $mode = $entry->is_blockdev ? 'b' : 'c';
860
861        ON_UNIX && !system('mknod', $file, $mode,
862                            $entry->devmajor, $entry->devminor) or
863            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
864                    $entry->devmajor . qq[ min=] . $entry->devminor .
865                    qq[) failed.];
866
867    } elsif ( $entry->is_socket ) {
868        ### the original doesn't do anything special for sockets.... ###
869        1;
870    }
871
872    return $err ? $self->_error( $err ) : 1;
873}
874
875### don't know how to make symlinks, let's just extract the file as
876### a plain file
877sub _extract_special_file_as_plain_file {
878    my $self    = shift;
879    my $entry   = shift     or return;
880    my $file    = shift;    return unless defined $file;
881
882    my $err;
883    TRY: {
884        my $orig = $self->_find_entry( $entry->linkname );
885
886        unless( $orig ) {
887            $err =  qq[Could not find file '] . $entry->linkname .
888                    qq[' in memory.];
889            last TRY;
890        }
891
892        ### clone the entry, make it appear as a normal file ###
893        my $clone = $entry->clone;
894        $clone->_downgrade_to_plainfile;
895        $self->_extract_file( $clone, $file ) or last TRY;
896
897        return 1;
898    }
899
900    return $self->_error($err);
901}
902
903=head2 $tar->list_files( [\@properties] )
904
905Returns a list of the names of all the files in the archive.
906
907If C<list_files()> is passed an array reference as its first argument
908it returns a list of hash references containing the requested
909properties of each file.  The following list of properties is
910supported: name, size, mtime (last modified date), mode, uid, gid,
911linkname, uname, gname, devmajor, devminor, prefix.
912
913Passing an array reference containing only one element, 'name', is
914special cased to return a list of names rather than a list of hash
915references, making it equivalent to calling C<list_files> without
916arguments.
917
918=cut
919
920sub list_files {
921    my $self = shift;
922    my $aref = shift || [ ];
923
924    unless( $self->_data ) {
925        $self->read() or return;
926    }
927
928    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
929        return map { $_->full_path } @{$self->_data};
930    } else {
931
932        #my @rv;
933        #for my $obj ( @{$self->_data} ) {
934        #    push @rv, { map { $_ => $obj->$_() } @$aref };
935        #}
936        #return @rv;
937
938        ### this does the same as the above.. just needs a +{ }
939        ### to make sure perl doesn't confuse it for a block
940        return map {    my $o=$_;
941                        +{ map { $_ => $o->$_() } @$aref }
942                    } @{$self->_data};
943    }
944}
945
946sub _find_entry {
947    my $self = shift;
948    my $file = shift;
949
950    unless( defined $file ) {
951        $self->_error( qq[No file specified] );
952        return;
953    }
954
955    ### it's an object already
956    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
957
958    for my $entry ( @{$self->_data} ) {
959        my $path = $entry->full_path;
960        return $entry if $path eq $file;
961    }
962
963    $self->_error( qq[No such file in archive: '$file'] );
964    return;
965}
966
967=head2 $tar->get_files( [@filenames] )
968
969Returns the C<Archive::Tar::File> objects matching the filenames
970provided. If no filename list was passed, all C<Archive::Tar::File>
971objects in the current Tar object are returned.
972
973Please refer to the C<Archive::Tar::File> documentation on how to
974handle these objects.
975
976=cut
977
978sub get_files {
979    my $self = shift;
980
981    return @{ $self->_data } unless @_;
982
983    my @list;
984    for my $file ( @_ ) {
985        push @list, grep { defined } $self->_find_entry( $file );
986    }
987
988    return @list;
989}
990
991=head2 $tar->get_content( $file )
992
993Return the content of the named file.
994
995=cut
996
997sub get_content {
998    my $self = shift;
999    my $entry = $self->_find_entry( shift ) or return;
1000
1001    return $entry->data;
1002}
1003
1004=head2 $tar->replace_content( $file, $content )
1005
1006Make the string $content be the content for the file named $file.
1007
1008=cut
1009
1010sub replace_content {
1011    my $self = shift;
1012    my $entry = $self->_find_entry( shift ) or return;
1013
1014    return $entry->replace_content( shift );
1015}
1016
1017=head2 $tar->rename( $file, $new_name )
1018
1019Rename the file of the in-memory archive to $new_name.
1020
1021Note that you must specify a Unix path for $new_name, since per tar
1022standard, all files in the archive must be Unix paths.
1023
1024Returns true on success and false on failure.
1025
1026=cut
1027
1028sub rename {
1029    my $self = shift;
1030    my $file = shift; return unless defined $file;
1031    my $new  = shift; return unless defined $new;
1032
1033    my $entry = $self->_find_entry( $file ) or return;
1034
1035    return $entry->rename( $new );
1036}
1037
1038=head2 $tar->remove (@filenamelist)
1039
1040Removes any entries with names matching any of the given filenames
1041from the in-memory archive. Returns a list of C<Archive::Tar::File>
1042objects that remain.
1043
1044=cut
1045
1046sub remove {
1047    my $self = shift;
1048    my @list = @_;
1049
1050    my %seen = map { $_->full_path => $_ } @{$self->_data};
1051    delete $seen{ $_ } for @list;
1052
1053    $self->_data( [values %seen] );
1054
1055    return values %seen;
1056}
1057
1058=head2 $tar->clear
1059
1060C<clear> clears the current in-memory archive. This effectively gives
1061you a 'blank' object, ready to be filled again. Note that C<clear>
1062only has effect on the object, not the underlying tarfile.
1063
1064=cut
1065
1066sub clear {
1067    my $self = shift or return;
1068
1069    $self->_data( [] );
1070    $self->_file( '' );
1071
1072    return 1;
1073}
1074
1075
1076=head2 $tar->write ( [$file, $compressed, $prefix] )
1077
1078Write the in-memory archive to disk.  The first argument can either
1079be the name of a file or a reference to an already open filehandle (a
1080GLOB reference).
1081
1082The second argument is used to indicate compression. You can either
1083compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1084to be the C<gzip> compression level (between 1 and 9), but the use of
1085constants is prefered:
1086
1087  # write a gzip compressed file
1088  $tar->write( 'out.tgz', COMPRESS_GZIP );
1089
1090  # write a bzip compressed file
1091  $tar->write( 'out.tbz', COMPRESS_BZIP );
1092
1093Note that when you pass in a filehandle, the compression argument
1094is ignored, as all files are printed verbatim to your filehandle.
1095If you wish to enable compression with filehandles, use an
1096C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1097
1098The third argument is an optional prefix. All files will be tucked
1099away in the directory you specify as prefix. So if you have files
1100'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1101will be written to the archive as 'foo/a' and 'foo/b'.
1102
1103If no arguments are given, C<write> returns the entire formatted
1104archive as a string, which could be useful if you'd like to stuff the
1105archive into a socket or a pipe to gzip or something.
1106
1107
1108=cut
1109
1110sub write {
1111    my $self        = shift;
1112    my $file        = shift; $file = '' unless defined $file;
1113    my $gzip        = shift || 0;
1114    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1115    my $dummy       = '';
1116
1117    ### only need a handle if we have a file to print to ###
1118    my $handle = length($file)
1119                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1120                        or return )
1121                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1122                    : $HAS_IO_STRING ? IO::String->new
1123                    : __PACKAGE__->no_string_support();
1124
1125    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1126    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1127    ### printed to the archive
1128    local $\;
1129
1130    for my $entry ( @{$self->_data} ) {
1131        ### entries to be written to the tarfile ###
1132        my @write_me;
1133
1134        ### only now will we change the object to reflect the current state
1135        ### of the name and prefix fields -- this needs to be limited to
1136        ### write() only!
1137        my $clone = $entry->clone;
1138
1139
1140        ### so, if you don't want use to use the prefix, we'll stuff
1141        ### everything in the name field instead
1142        if( $DO_NOT_USE_PREFIX ) {
1143
1144            ### you might have an extended prefix, if so, set it in the clone
1145            ### XXX is ::Unix right?
1146            $clone->name( length $ext_prefix
1147                            ? File::Spec::Unix->catdir( $ext_prefix,
1148                                                        $clone->full_path)
1149                            : $clone->full_path );
1150            $clone->prefix( '' );
1151
1152        ### otherwise, we'll have to set it properly -- prefix part in the
1153        ### prefix and name part in the name field.
1154        } else {
1155
1156            ### split them here, not before!
1157            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1158
1159            ### you might have an extended prefix, if so, set it in the clone
1160            ### XXX is ::Unix right?
1161            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1162                if length $ext_prefix;
1163
1164            $clone->prefix( $prefix );
1165            $clone->name( $name );
1166        }
1167
1168        ### names are too long, and will get truncated if we don't add a
1169        ### '@LongLink' file...
1170        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1171                                length($clone->prefix)  > PREFIX_LENGTH
1172                            ) || 0;
1173
1174        ### perhaps we need to make a longlink file?
1175        if( $make_longlink ) {
1176            my $longlink = Archive::Tar::File->new(
1177                            data => LONGLINK_NAME,
1178                            $clone->full_path,
1179                            { type => LONGLINK }
1180                        );
1181
1182            unless( $longlink ) {
1183                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1184                                qq[oversize file '] . $clone->full_path ."'" );
1185                return;
1186            };
1187
1188            push @write_me, $longlink;
1189        }
1190
1191        push @write_me, $clone;
1192
1193        ### write the one, optionally 2 a::t::file objects to the handle
1194        for my $clone (@write_me) {
1195
1196            ### if the file is a symlink, there are 2 options:
1197            ### either we leave the symlink intact, but then we don't write any
1198            ### data OR we follow the symlink, which means we actually make a
1199            ### copy. if we do the latter, we have to change the TYPE of the
1200            ### clone to 'FILE'
1201            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1202            my $data_ok = !$clone->is_symlink && $clone->has_content;
1203
1204            ### downgrade to a 'normal' file if it's a symlink we're going to
1205            ### treat as a regular file
1206            $clone->_downgrade_to_plainfile if $link_ok;
1207
1208            ### get the header for this block
1209            my $header = $self->_format_tar_entry( $clone );
1210            unless( $header ) {
1211                $self->_error(q[Could not format header for: ] .
1212                                    $clone->full_path );
1213                return;
1214            }
1215
1216            unless( print $handle $header ) {
1217                $self->_error(q[Could not write header for: ] .
1218                                    $clone->full_path);
1219                return;
1220            }
1221
1222            if( $link_ok or $data_ok ) {
1223                unless( print $handle $clone->data ) {
1224                    $self->_error(q[Could not write data for: ] .
1225                                    $clone->full_path);
1226                    return;
1227                }
1228
1229                ### pad the end of the clone if required ###
1230                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1231            }
1232
1233        } ### done writing these entries
1234    }
1235
1236    ### write the end markers ###
1237    print $handle TAR_END x 2 or
1238            return $self->_error( qq[Could not write tar end markers] );
1239
1240    ### did you want it written to a file, or returned as a string? ###
1241    my $rv =  length($file) ? 1
1242                        : $HAS_PERLIO ? $dummy
1243                        : do { seek $handle, 0, 0; local $/; <$handle> };
1244
1245    ### make sure to close the handle;
1246    close $handle;
1247
1248    return $rv;
1249}
1250
1251sub _format_tar_entry {
1252    my $self        = shift;
1253    my $entry       = shift or return;
1254    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1255    my $no_prefix   = shift || 0;
1256
1257    my $file    = $entry->name;
1258    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1259
1260    ### remove the prefix from the file name
1261    ### not sure if this is still neeeded --kane
1262    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1263    ### this for us. Even worse, this would break if we tried to add a file
1264    ### like x/x.
1265    #if( length $prefix ) {
1266    #    $file =~ s/^$match//;
1267    #}
1268
1269    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1270                if length $ext_prefix;
1271
1272    ### not sure why this is... ###
1273    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1274    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1275
1276    my $f1 = "%06o"; my $f2  = "%11o";
1277
1278    ### this might be optimizable with a 'changed' flag in the file objects ###
1279    my $tar = pack (
1280                PACK,
1281                $file,
1282
1283                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1284                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1285
1286                "",  # checksum field - space padded a bit down
1287
1288                (map { $entry->$_() }                 qw[type linkname magic]),
1289
1290                $entry->version || TAR_VERSION,
1291
1292                (map { $entry->$_() }                 qw[uname gname]),
1293                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1294
1295                ($no_prefix ? '' : $prefix)
1296    );
1297
1298    ### add the checksum ###
1299    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1300
1301    return $tar;
1302}
1303
1304=head2 $tar->add_files( @filenamelist )
1305
1306Takes a list of filenames and adds them to the in-memory archive.
1307
1308The path to the file is automatically converted to a Unix like
1309equivalent for use in the archive, and, if on MacOS, the file's
1310modification time is converted from the MacOS epoch to the Unix epoch.
1311So tar archives created on MacOS with B<Archive::Tar> can be read
1312both with I<tar> on Unix and applications like I<suntar> or
1313I<Stuffit Expander> on MacOS.
1314
1315Be aware that the file's type/creator and resource fork will be lost,
1316which is usually what you want in cross-platform archives.
1317
1318Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1319object from, for example, another archive. The object will be clone, and
1320effectively be a copy of the original, not an alias.
1321
1322Returns a list of C<Archive::Tar::File> objects that were just added.
1323
1324=cut
1325
1326sub add_files {
1327    my $self    = shift;
1328    my @files   = @_ or return;
1329
1330    my @rv;
1331    for my $file ( @files ) {
1332
1333        ### you passed an Archive::Tar::File object
1334        ### clone it so we don't accidentally have a reference to
1335        ### an object from another archive
1336        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1337            push @rv, $file->clone;
1338            next;
1339        }
1340
1341        unless( -e $file || -l $file ) {
1342            $self->_error( qq[No such file: '$file'] );
1343            next;
1344        }
1345
1346        my $obj = Archive::Tar::File->new( file => $file );
1347        unless( $obj ) {
1348            $self->_error( qq[Unable to add file: '$file'] );
1349            next;
1350        }
1351
1352        push @rv, $obj;
1353    }
1354
1355    push @{$self->{_data}}, @rv;
1356
1357    return @rv;
1358}
1359
1360=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1361
1362Takes a filename, a scalar full of data and optionally a reference to
1363a hash with specific options.
1364
1365Will add a file to the in-memory archive, with name C<$filename> and
1366content C<$data>. Specific properties can be set using C<$opthashref>.
1367The following list of properties is supported: name, size, mtime
1368(last modified date), mode, uid, gid, linkname, uname, gname,
1369devmajor, devminor, prefix, type.  (On MacOS, the file's path and
1370modification times are converted to Unix equivalents.)
1371
1372Valid values for the file type are the following constants defined in
1373Archive::Tar::Constants:
1374
1375=over 4
1376
1377=item FILE
1378
1379Regular file.
1380
1381=item HARDLINK
1382
1383=item SYMLINK
1384
1385Hard and symbolic ("soft") links; linkname should specify target.
1386
1387=item CHARDEV
1388
1389=item BLOCKDEV
1390
1391Character and block devices. devmajor and devminor should specify the major
1392and minor device numbers.
1393
1394=item DIR
1395
1396Directory.
1397
1398=item FIFO
1399
1400FIFO (named pipe).
1401
1402=item SOCKET
1403
1404Socket.
1405
1406=back
1407
1408Returns the C<Archive::Tar::File> object that was just added, or
1409C<undef> on failure.
1410
1411=cut
1412
1413sub add_data {
1414    my $self    = shift;
1415    my ($file, $data, $opt) = @_;
1416
1417    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1418    unless( $obj ) {
1419        $self->_error( qq[Unable to add file: '$file'] );
1420        return;
1421    }
1422
1423    push @{$self->{_data}}, $obj;
1424
1425    return $obj;
1426}
1427
1428=head2 $tar->error( [$BOOL] )
1429
1430Returns the current errorstring (usually, the last error reported).
1431If a true value was specified, it will give the C<Carp::longmess>
1432equivalent of the error, in effect giving you a stacktrace.
1433
1434For backwards compatibility, this error is also available as
1435C<$Archive::Tar::error> although it is much recommended you use the
1436method call instead.
1437
1438=cut
1439
1440{
1441    $error = '';
1442    my $longmess;
1443
1444    sub _error {
1445        my $self    = shift;
1446        my $msg     = $error = shift;
1447        $longmess   = Carp::longmess($error);
1448        if (ref $self) {
1449            $self->{_error} = $error;
1450            $self->{_longmess} = $longmess;
1451        }
1452
1453        ### set Archive::Tar::WARN to 0 to disable printing
1454        ### of errors
1455        if( $WARN ) {
1456            carp $DEBUG ? $longmess : $msg;
1457        }
1458
1459        return;
1460    }
1461
1462    sub error {
1463        my $self = shift;
1464        if (ref $self) {
1465            return shift() ? $self->{_longmess} : $self->{_error};
1466        } else {
1467            return shift() ? $longmess : $error;
1468        }
1469    }
1470}
1471
1472=head2 $tar->setcwd( $cwd );
1473
1474C<Archive::Tar> needs to know the current directory, and it will run
1475C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1476tarfile and saves it in the file system. (As of version 1.30, however,
1477C<Archive::Tar> will use the speed optimization described below
1478automatically, so it's only relevant if you're using C<extract_file()>).
1479
1480Since C<Archive::Tar> doesn't change the current directory internally
1481while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1482can be avoided if we can guarantee that the current directory doesn't
1483get changed externally.
1484
1485To use this performance boost, set the current directory via
1486
1487    use Cwd;
1488    $tar->setcwd( cwd() );
1489
1490once before calling a function like C<extract_file> and
1491C<Archive::Tar> will use the current directory setting from then on
1492and won't call C<Cwd::cwd()> internally.
1493
1494To switch back to the default behaviour, use
1495
1496    $tar->setcwd( undef );
1497
1498and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1499
1500If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will
1501be called for you.
1502
1503=cut
1504
1505sub setcwd {
1506    my $self     = shift;
1507    my $cwd      = shift;
1508
1509    $self->{cwd} = $cwd;
1510}
1511
1512=head1 Class Methods
1513
1514=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1515
1516Creates a tar file from the list of files provided.  The first
1517argument can either be the name of the tar file to create or a
1518reference to an open file handle (e.g. a GLOB reference).
1519
1520The second argument is used to indicate compression. You can either
1521compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1522to be the C<gzip> compression level (between 1 and 9), but the use of
1523constants is prefered:
1524
1525  # write a gzip compressed file
1526  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1527
1528  # write a bzip compressed file
1529  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1530
1531Note that when you pass in a filehandle, the compression argument
1532is ignored, as all files are printed verbatim to your filehandle.
1533If you wish to enable compression with filehandles, use an
1534C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1535
1536The remaining arguments list the files to be included in the tar file.
1537These files must all exist. Any files which don't exist or can't be
1538read are silently ignored.
1539
1540If the archive creation fails for any reason, C<create_archive> will
1541return false. Please use the C<error> method to find the cause of the
1542failure.
1543
1544Note that this method does not write C<on the fly> as it were; it
1545still reads all the files into memory before writing out the archive.
1546Consult the FAQ below if this is a problem.
1547
1548=cut
1549
1550sub create_archive {
1551    my $class = shift;
1552
1553    my $file    = shift; return unless defined $file;
1554    my $gzip    = shift || 0;
1555    my @files   = @_;
1556
1557    unless( @files ) {
1558        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1559    }
1560
1561    my $tar = $class->new;
1562    $tar->add_files( @files );
1563    return $tar->write( $file, $gzip );
1564}
1565
1566=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1567
1568Returns an iterator function that reads the tar file without loading
1569it all in memory.  Each time the function is called it will return the
1570next file in the tarball. The files are returned as
1571C<Archive::Tar::File> objects. The iterator function returns the
1572empty list once it has exhausted the files contained.
1573
1574The second argument can be a hash reference with options, which are
1575identical to the arguments passed to C<read()>.
1576
1577Example usage:
1578
1579    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1580
1581    while( my $f = $next->() ) {
1582        print $f->name, "\n";
1583
1584        $f->extract or warn "Extraction failed";
1585
1586        # ....
1587    }
1588
1589=cut
1590
1591
1592sub iter {
1593    my $class       = shift;
1594    my $filename    = shift or return;
1595    my $compressed  = shift or 0;
1596    my $opts        = shift || {};
1597
1598    ### get a handle to read from.
1599    my $handle = $class->_get_handle(
1600        $filename,
1601        $compressed,
1602        READ_ONLY->( ZLIB )
1603    ) or return;
1604
1605    my @data;
1606    return sub {
1607        return shift(@data)     if @data;       # more than one file returned?
1608        return                  unless $handle; # handle exhausted?
1609
1610        ### read data, should only return file
1611        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1612        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
1613
1614        ### return one piece of data
1615        return shift(@data)     if @data;
1616
1617        ### data is exhausted, free the filehandle
1618        undef $handle;
1619        return;
1620    };
1621}
1622
1623=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1624
1625Returns a list of the names of all the files in the archive.  The
1626first argument can either be the name of the tar file to list or a
1627reference to an open file handle (e.g. a GLOB reference).
1628
1629If C<list_archive()> is passed an array reference as its third
1630argument it returns a list of hash references containing the requested
1631properties of each file.  The following list of properties is
1632supported: full_path, name, size, mtime (last modified date), mode,
1633uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1634
1635See C<Archive::Tar::File> for details about supported properties.
1636
1637Passing an array reference containing only one element, 'name', is
1638special cased to return a list of names rather than a list of hash
1639references.
1640
1641=cut
1642
1643sub list_archive {
1644    my $class   = shift;
1645    my $file    = shift; return unless defined $file;
1646    my $gzip    = shift || 0;
1647
1648    my $tar = $class->new($file, $gzip);
1649    return unless $tar;
1650
1651    return $tar->list_files( @_ );
1652}
1653
1654=head2 Archive::Tar->extract_archive($file, $compressed)
1655
1656Extracts the contents of the tar file.  The first argument can either
1657be the name of the tar file to create or a reference to an open file
1658handle (e.g. a GLOB reference).  All relative paths in the tar file will
1659be created underneath the current working directory.
1660
1661C<extract_archive> will return a list of files it extracted.
1662If the archive extraction fails for any reason, C<extract_archive>
1663will return false.  Please use the C<error> method to find the cause
1664of the failure.
1665
1666=cut
1667
1668sub extract_archive {
1669    my $class   = shift;
1670    my $file    = shift; return unless defined $file;
1671    my $gzip    = shift || 0;
1672
1673    my $tar = $class->new( ) or return;
1674
1675    return $tar->read( $file, $gzip, { extract => 1 } );
1676}
1677
1678=head2 $bool = Archive::Tar->has_io_string
1679
1680Returns true if we currently have C<IO::String> support loaded.
1681
1682Either C<IO::String> or C<perlio> support is needed to support writing
1683stringified archives. Currently, C<perlio> is the preferred method, if
1684available.
1685
1686See the C<GLOBAL VARIABLES> section to see how to change this preference.
1687
1688=cut
1689
1690sub has_io_string { return $HAS_IO_STRING; }
1691
1692=head2 $bool = Archive::Tar->has_perlio
1693
1694Returns true if we currently have C<perlio> support loaded.
1695
1696This requires C<perl-5.8> or higher, compiled with C<perlio>
1697
1698Either C<IO::String> or C<perlio> support is needed to support writing
1699stringified archives. Currently, C<perlio> is the preferred method, if
1700available.
1701
1702See the C<GLOBAL VARIABLES> section to see how to change this preference.
1703
1704=cut
1705
1706sub has_perlio { return $HAS_PERLIO; }
1707
1708=head2 $bool = Archive::Tar->has_zlib_support
1709
1710Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1711
1712=cut
1713
1714sub has_zlib_support { return ZLIB }
1715
1716=head2 $bool = Archive::Tar->has_bzip2_support
1717
1718Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1719
1720=cut
1721
1722sub has_bzip2_support { return BZIP }
1723
1724=head2 Archive::Tar->can_handle_compressed_files
1725
1726A simple checking routine, which will return true if C<Archive::Tar>
1727is able to uncompress compressed archives on the fly with C<IO::Zlib>
1728and C<IO::Compress::Bzip2> or false if not both are installed.
1729
1730You can use this as a shortcut to determine whether C<Archive::Tar>
1731will do what you think before passing compressed archives to its
1732C<read> method.
1733
1734=cut
1735
1736sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1737
1738sub no_string_support {
1739    croak("You have to install IO::String to support writing archives to strings");
1740}
1741
17421;
1743
1744__END__
1745
1746=head1 GLOBAL VARIABLES
1747
1748=head2 $Archive::Tar::FOLLOW_SYMLINK
1749
1750Set this variable to C<1> to make C<Archive::Tar> effectively make a
1751copy of the file when extracting. Default is C<0>, which
1752means the symlink stays intact. Of course, you will have to pack the
1753file linked to as well.
1754
1755This option is checked when you write out the tarfile using C<write>
1756or C<create_archive>.
1757
1758This works just like C</bin/tar>'s C<-h> option.
1759
1760=head2 $Archive::Tar::CHOWN
1761
1762By default, C<Archive::Tar> will try to C<chown> your files if it is
1763able to. In some cases, this may not be desired. In that case, set
1764this variable to C<0> to disable C<chown>-ing, even if it were
1765possible.
1766
1767The default is C<1>.
1768
1769=head2 $Archive::Tar::CHMOD
1770
1771By default, C<Archive::Tar> will try to C<chmod> your files to
1772whatever mode was specified for the particular file in the archive.
1773In some cases, this may not be desired. In that case, set this
1774variable to C<0> to disable C<chmod>-ing.
1775
1776The default is C<1>.
1777
1778=head2 $Archive::Tar::SAME_PERMISSIONS
1779
1780When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
1781the permissions on files from the archive are used without modification
1782of if they are filtered by removing any setid bits and applying the
1783current umask.
1784
1785The default is C<1> for the root user and C<0> for normal users.
1786
1787=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1788
1789By default, C<Archive::Tar> will try to put paths that are over
1790100 characters in the C<prefix> field of your tar header, as
1791defined per POSIX-standard. However, some (older) tar programs
1792do not implement this spec. To retain compatibility with these older
1793or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1794variable to a true value, and C<Archive::Tar> will use an alternate
1795way of dealing with paths over 100 characters by using the
1796C<GNU Extended Header> feature.
1797
1798Note that clients who do not support the C<GNU Extended Header>
1799feature will not be able to read these archives. Such clients include
1800tars on C<Solaris>, C<Irix> and C<AIX>.
1801
1802The default is C<0>.
1803
1804=head2 $Archive::Tar::DEBUG
1805
1806Set this variable to C<1> to always get the C<Carp::longmess> output
1807of the warnings, instead of the regular C<carp>. This is the same
1808message you would get by doing:
1809
1810    $tar->error(1);
1811
1812Defaults to C<0>.
1813
1814=head2 $Archive::Tar::WARN
1815
1816Set this variable to C<0> if you do not want any warnings printed.
1817Personally I recommend against doing this, but people asked for the
1818option. Also, be advised that this is of course not threadsafe.
1819
1820Defaults to C<1>.
1821
1822=head2 $Archive::Tar::error
1823
1824Holds the last reported error. Kept for historical reasons, but its
1825use is very much discouraged. Use the C<error()> method instead:
1826
1827    warn $tar->error unless $tar->extract;
1828
1829Note that in older versions of this module, the C<error()> method
1830would return an effectively global value even when called an instance
1831method as above. This has since been fixed, and multiple instances of
1832C<Archive::Tar> now have separate error strings.
1833
1834=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
1835
1836This variable indicates whether C<Archive::Tar> should allow
1837files to be extracted outside their current working directory.
1838
1839Allowing this could have security implications, as a malicious
1840tar archive could alter or replace any file the extracting user
1841has permissions to. Therefor, the default is to not allow
1842insecure extractions.
1843
1844If you trust the archive, or have other reasons to allow the
1845archive to write files outside your current working directory,
1846set this variable to C<true>.
1847
1848Note that this is a backwards incompatible change from version
1849C<1.36> and before.
1850
1851=head2 $Archive::Tar::HAS_PERLIO
1852
1853This variable holds a boolean indicating if we currently have
1854C<perlio> support loaded. This will be enabled for any perl
1855greater than C<5.8> compiled with C<perlio>.
1856
1857If you feel strongly about disabling it, set this variable to
1858C<false>. Note that you will then need C<IO::String> installed
1859to support writing stringified archives.
1860
1861Don't change this variable unless you B<really> know what you're
1862doing.
1863
1864=head2 $Archive::Tar::HAS_IO_STRING
1865
1866This variable holds a boolean indicating if we currently have
1867C<IO::String> support loaded. This will be enabled for any perl
1868that has a loadable C<IO::String> module.
1869
1870If you feel strongly about disabling it, set this variable to
1871C<false>. Note that you will then need C<perlio> support from
1872your perl to be able to  write stringified archives.
1873
1874Don't change this variable unless you B<really> know what you're
1875doing.
1876
1877=head1 FAQ
1878
1879=over 4
1880
1881=item What's the minimum perl version required to run Archive::Tar?
1882
1883You will need perl version 5.005_03 or newer.
1884
1885=item Isn't Archive::Tar slow?
1886
1887Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1888However, it's very portable. If speed is an issue, consider using
1889C</bin/tar> instead.
1890
1891=item Isn't Archive::Tar heavier on memory than /bin/tar?
1892
1893Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1894C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1895choice but to read the archive into memory.
1896This is ok if you want to do in-memory manipulation of the archive.
1897
1898If you just want to extract, use the C<extract_archive> class method
1899instead. It will optimize and write to disk immediately.
1900
1901Another option is to use the C<iter> class method to iterate over
1902the files in the tarball without reading them all in memory at once.
1903
1904=item Can you lazy-load data instead?
1905
1906In some cases, yes. You can use the C<iter> class method to iterate
1907over the files in the tarball without reading them all in memory at once.
1908
1909=item How much memory will an X kb tar file need?
1910
1911Probably more than X kb, since it will all be read into memory. If
1912this is a problem, and you don't need to do in memory manipulation
1913of the archive, consider using the C<iter> class method, or C</bin/tar>
1914instead.
1915
1916=item What do you do with unsupported filetypes in an archive?
1917
1918C<Unix> has a few filetypes that aren't supported on other platforms,
1919like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1920try to make a copy of the original file, rather than throwing an error.
1921
1922This does require you to read the entire archive in to memory first,
1923since otherwise we wouldn't know what data to fill the copy with.
1924(This means that you cannot use the class methods, including C<iter>
1925on archives that have incompatible filetypes and still expect things
1926to work).
1927
1928For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1929the extraction of this particular item didn't work.
1930
1931=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
1932
1933By default, C<Archive::Tar> is in a completely POSIX-compatible
1934mode, which uses the POSIX-specification of C<tar> to store files.
1935For paths greather than 100 characters, this is done using the
1936C<POSIX header prefix>. Non-POSIX-compatible clients may not support
1937this part of the specification, and may only support the C<GNU Extended
1938Header> functionality. To facilitate those clients, you can set the
1939C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
1940C<GLOBAL VARIABLES> section for details on this variable.
1941
1942Note that GNU tar earlier than version 1.14 does not cope well with
1943the C<POSIX header prefix>. If you use such a version, consider setting
1944the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
1945
1946=item How do I extract only files that have property X from an archive?
1947
1948Sometimes, you might not wish to extract a complete archive, just
1949the files that are relevant to you, based on some criteria.
1950
1951You can do this by filtering a list of C<Archive::Tar::File> objects
1952based on your criteria. For example, to extract only files that have
1953the string C<foo> in their title, you would use:
1954
1955    $tar->extract(
1956        grep { $_->full_path =~ /foo/ } $tar->get_files
1957    );
1958
1959This way, you can filter on any attribute of the files in the archive.
1960Consult the C<Archive::Tar::File> documentation on how to use these
1961objects.
1962
1963=item How do I access .tar.Z files?
1964
1965The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1966the C<IO::Zlib> module) to access tar files that have been compressed
1967with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1968utility cannot be read by C<Compress::Zlib> and so cannot be directly
1969accesses by C<Archive::Tar>.
1970
1971If the C<uncompress> or C<gunzip> programs are available, you can use
1972one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1973
1974Firstly with C<uncompress>
1975
1976    use Archive::Tar;
1977
1978    open F, "uncompress -c $filename |";
1979    my $tar = Archive::Tar->new(*F);
1980    ...
1981
1982and this with C<gunzip>
1983
1984    use Archive::Tar;
1985
1986    open F, "gunzip -c $filename |";
1987    my $tar = Archive::Tar->new(*F);
1988    ...
1989
1990Similarly, if the C<compress> program is available, you can use this to
1991write a C<.tar.Z> file
1992
1993    use Archive::Tar;
1994    use IO::File;
1995
1996    my $fh = new IO::File "| compress -c >$filename";
1997    my $tar = Archive::Tar->new();
1998    ...
1999    $tar->write($fh);
2000    $fh->close ;
2001
2002=item How do I handle Unicode strings?
2003
2004C<Archive::Tar> uses byte semantics for any files it reads from or writes
2005to disk. This is not a problem if you only deal with files and never
2006look at their content or work solely with byte strings. But if you use
2007Unicode strings with character semantics, some additional steps need
2008to be taken.
2009
2010For example, if you add a Unicode string like
2011
2012    # Problem
2013    $tar->add_data('file.txt', "Euro: \x{20AC}");
2014
2015then there will be a problem later when the tarfile gets written out
2016to disk via C<$tar->write()>:
2017
2018    Wide character in print at .../Archive/Tar.pm line 1014.
2019
2020The data was added as a Unicode string and when writing it out to disk,
2021the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2022tried to convert the string to ISO-8859 and failed. The written file
2023now contains garbage.
2024
2025For this reason, Unicode strings need to be converted to UTF-8-encoded
2026bytestrings before they are handed off to C<add_data()>:
2027
2028    use Encode;
2029    my $data = "Accented character: \x{20AC}";
2030    $data = encode('utf8', $data);
2031
2032    $tar->add_data('file.txt', $data);
2033
2034A opposite problem occurs if you extract a UTF8-encoded file from a
2035tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2036will return its content as a bytestring, not as a Unicode string.
2037
2038If you want it to be a Unicode string (because you want character
2039semantics with operations like regular expression matching), you need
2040to decode the UTF8-encoded content and have Perl convert it into
2041a Unicode string:
2042
2043    use Encode;
2044    my $data = $tar->get_content();
2045
2046    # Make it a Unicode string
2047    $data = decode('utf8', $data);
2048
2049There is no easy way to provide this functionality in C<Archive::Tar>,
2050because a tarball can contain many files, and each of which could be
2051encoded in a different way.
2052
2053=back
2054
2055=head1 CAVEATS
2056
2057The AIX tar does not fill all unused space in the tar archive with 0x00.
2058This sometimes leads to warning messages from C<Archive::Tar>.
2059
2060  Invalid header block at offset nnn
2061
2062A fix for that problem is scheduled to be released in the following levels
2063of AIX, all of which should be coming out in the 4th quarter of 2009:
2064
2065 AIX 5.3 TL7 SP10
2066 AIX 5.3 TL8 SP8
2067 AIX 5.3 TL9 SP5
2068 AIX 5.3 TL10 SP2
2069
2070 AIX 6.1 TL0 SP11
2071 AIX 6.1 TL1 SP7
2072 AIX 6.1 TL2 SP6
2073 AIX 6.1 TL3 SP3
2074
2075The IBM APAR number for this problem is IZ50240 (Reported component ID:
20765765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2077If you need an ifix please contact your local IBM AIX support.
2078
2079=head1 TODO
2080
2081=over 4
2082
2083=item Check if passed in handles are open for read/write
2084
2085Currently I don't know of any portable pure perl way to do this.
2086Suggestions welcome.
2087
2088=item Allow archives to be passed in as string
2089
2090Currently, we only allow opened filehandles or filenames, but
2091not strings. The internals would need some reworking to facilitate
2092stringified archives.
2093
2094=item Facilitate processing an opened filehandle of a compressed archive
2095
2096Currently, we only support this if the filehandle is an IO::Zlib object.
2097Environments, like apache, will present you with an opened filehandle
2098to an uploaded file, which might be a compressed archive.
2099
2100=back
2101
2102=head1 SEE ALSO
2103
2104=over 4
2105
2106=item The GNU tar specification
2107
2108C<http://www.gnu.org/software/tar/manual/tar.html>
2109
2110=item The PAX format specication
2111
2112The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2113
2114=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2115
2116=item GNU tar intends to switch to POSIX compatibility
2117
2118GNU Tar authors have expressed their intention to become completely
2119POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2120
2121=item A Comparison between various tar implementations
2122
2123Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2124
2125=back
2126
2127=head1 AUTHOR
2128
2129This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2130
2131Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2132
2133=head1 ACKNOWLEDGEMENTS
2134
2135Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2136Rainer Tammer and especially Andrew Savige for their help and suggestions.
2137
2138=head1 COPYRIGHT
2139
2140This module is copyright (c) 2002 - 2009 Jos Boumans
2141E<lt>kane@cpan.orgE<gt>. All rights reserved.
2142
2143This library is free software; you may redistribute and/or modify
2144it under the same terms as Perl itself.
2145
2146=cut
2147