xref: /openbsd-src/gnu/usr.bin/perl/cpan/IO-Compress/lib/IO/Uncompress/Unzip.pm (revision 53555c846a0a6f917dbd0a191f826da995ab1c42)
1package IO::Uncompress::Unzip;
2
3require 5.006 ;
4
5# for RFC1952
6
7use strict ;
8use warnings;
9use bytes;
10
11use IO::File;
12use IO::Uncompress::RawInflate  2.204 ;
13use IO::Compress::Base::Common  2.204 qw(:Status );
14use IO::Uncompress::Adapter::Inflate  2.204 ;
15use IO::Uncompress::Adapter::Identity 2.204 ;
16use IO::Compress::Zlib::Extra 2.204 ;
17use IO::Compress::Zip::Constants 2.204 ;
18
19use Compress::Raw::Zlib  2.204 () ;
20
21BEGIN
22{
23   # Don't trigger any __DIE__ Hooks.
24   local $SIG{__DIE__};
25
26    eval{ require IO::Uncompress::Adapter::Bunzip2 ;
27          IO::Uncompress::Adapter::Bunzip2->import() } ;
28    eval{ require IO::Uncompress::Adapter::UnLzma ;
29          IO::Uncompress::Adapter::UnLzma->import() } ;
30    eval{ require IO::Uncompress::Adapter::UnXz ;
31          IO::Uncompress::Adapter::UnXz->import() } ;
32    eval{ require IO::Uncompress::Adapter::UnZstd ;
33          IO::Uncompress::Adapter::UnZstd->import() } ;
34}
35
36
37require Exporter ;
38
39our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
40
41$VERSION = '2.204';
42$UnzipError = '';
43
44@ISA    = qw(IO::Uncompress::RawInflate Exporter);
45@EXPORT_OK = qw($UnzipError unzip );
46%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
47push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
48Exporter::export_ok_tags('all');
49
50%headerLookup = (
51        ZIP_CENTRAL_HDR_SIG,            \&skipCentralDirectory,
52        ZIP_END_CENTRAL_HDR_SIG,        \&skipEndCentralDirectory,
53        ZIP64_END_CENTRAL_REC_HDR_SIG,  \&skipCentralDirectory64Rec,
54        ZIP64_END_CENTRAL_LOC_HDR_SIG,  \&skipCentralDirectory64Loc,
55        ZIP64_ARCHIVE_EXTRA_SIG,        \&skipArchiveExtra,
56        ZIP64_DIGITAL_SIGNATURE_SIG,    \&skipDigitalSignature,
57        );
58
59my %MethodNames = (
60        ZIP_CM_DEFLATE()    => 'Deflated',
61        ZIP_CM_BZIP2()      => 'Bzip2',
62        ZIP_CM_LZMA()       => 'Lzma',
63        ZIP_CM_STORE()      => 'Stored',
64        ZIP_CM_XZ()         => 'Xz',
65        ZIP_CM_ZSTD()       => 'Zstd',
66    );
67
68sub new
69{
70    my $class = shift ;
71    my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError);
72    $obj->_create(undef, 0, @_);
73}
74
75sub unzip
76{
77    my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError);
78    return $obj->_inf(@_) ;
79}
80
81sub getExtraParams
82{
83
84    return (
85#            # Zip header fields
86            'name'    => [IO::Compress::Base::Common::Parse_any,       undef],
87
88            'stream'  => [IO::Compress::Base::Common::Parse_boolean,   0],
89            'efs'     => [IO::Compress::Base::Common::Parse_boolean,   0],
90
91            # TODO - This means reading the central directory to get
92            # 1. the local header offsets
93            # 2. The compressed data length
94        );
95}
96
97sub ckParams
98{
99    my $self = shift ;
100    my $got = shift ;
101
102    # unzip always needs crc32
103    $got->setValue('crc32' => 1);
104
105    *$self->{UnzipData}{Name} = $got->getValue('name');
106    *$self->{UnzipData}{efs} = $got->getValue('efs');
107
108    return 1;
109}
110
111sub mkUncomp
112{
113    my $self = shift ;
114    my $got = shift ;
115
116     my $magic = $self->ckMagic()
117        or return 0;
118
119    *$self->{Info} = $self->readHeader($magic)
120        or return undef ;
121
122    return 1;
123
124}
125
126sub ckMagic
127{
128    my $self = shift;
129
130    my $magic ;
131    $self->smartReadExact(\$magic, 4);
132
133    *$self->{HeaderPending} = $magic ;
134
135    return $self->HeaderError("Minimum header size is " .
136                              4 . " bytes")
137        if length $magic != 4 ;
138
139    return $self->HeaderError("Bad Magic")
140        if ! _isZipMagic($magic) ;
141
142    *$self->{Type} = 'zip';
143
144    return $magic ;
145}
146
147
148sub fastForward
149{
150    my $self = shift;
151    my $offset = shift;
152
153    # TODO - if Stream isn't enabled & reading from file, use seek
154
155    my $buffer = '';
156    my $c = 1024 * 16;
157
158    while ($offset > 0)
159    {
160        $c = length $offset
161            if length $offset < $c ;
162
163        $offset -= $c;
164
165        $self->smartReadExact(\$buffer, $c)
166            or return 0;
167    }
168
169    return 1;
170}
171
172
173sub readHeader
174{
175    my $self = shift;
176    my $magic = shift ;
177
178    my $name =  *$self->{UnzipData}{Name} ;
179    my $hdr = $self->_readZipHeader($magic) ;
180
181    while (defined $hdr)
182    {
183        if (! defined $name || $hdr->{Name} eq $name)
184        {
185            return $hdr ;
186        }
187
188        # skip the data
189        # TODO - when Stream is off, use seek
190        my $buffer;
191        if (*$self->{ZipData}{Streaming}) {
192            while (1) {
193
194                my $b;
195                my $status = $self->smartRead(\$b, 1024 * 16);
196
197                return $self->saveErrorString(undef, "Truncated file")
198                    if $status <= 0 ;
199
200                my $temp_buf ;
201                my $out;
202
203                $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
204
205                return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
206                                                     *$self->{Uncomp}{ErrorNo})
207                    if $self->saveStatus($status) == STATUS_ERROR;
208
209                $self->pushBack($b)  ;
210
211                if ($status == STATUS_ENDSTREAM) {
212                    *$self->{Uncomp}->reset();
213                    last;
214                }
215            }
216
217            # skip the trailer
218            $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
219                or return $self->saveErrorString(undef, "Truncated file");
220        }
221        else {
222            my $c = $hdr->{CompressedLength}->get64bit();
223            $self->fastForward($c)
224                or return $self->saveErrorString(undef, "Truncated file");
225            $buffer = '';
226        }
227
228        $self->chkTrailer($buffer) == STATUS_OK
229            or return $self->saveErrorString(undef, "Truncated file");
230
231        $hdr = $self->_readFullZipHeader();
232
233        return $self->saveErrorString(undef, "Cannot find '$name'")
234            if $self->smartEof();
235    }
236
237    return undef;
238}
239
240sub chkTrailer
241{
242    my $self = shift;
243    my $trailer = shift;
244
245    my ($sig, $CRC32, $cSize, $uSize) ;
246    my ($cSizeHi, $uSizeHi) = (0, 0);
247    if (*$self->{ZipData}{Streaming}) {
248        $sig   = unpack ("V", substr($trailer, 0, 4));
249        $CRC32 = unpack ("V", substr($trailer, 4, 4));
250
251        if (*$self->{ZipData}{Zip64} ) {
252            $cSize = U64::newUnpack_V64 substr($trailer,  8, 8);
253            $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
254        }
255        else {
256            $cSize = U64::newUnpack_V32 substr($trailer,  8, 4);
257            $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
258        }
259
260        return $self->TrailerError("Data Descriptor signature, got $sig")
261            if $sig != ZIP_DATA_HDR_SIG;
262    }
263    else {
264        ($CRC32, $cSize, $uSize) =
265            (*$self->{ZipData}{Crc32},
266             *$self->{ZipData}{CompressedLen},
267             *$self->{ZipData}{UnCompressedLen});
268    }
269
270    *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ;
271    *$self->{Info}{CompressedLength} = $cSize->get64bit();
272    *$self->{Info}{UncompressedLength} = $uSize->get64bit();
273
274    if (*$self->{Strict}) {
275        return $self->TrailerError("CRC mismatch")
276            if $CRC32  != *$self->{ZipData}{CRC32} ;
277
278        return $self->TrailerError("CSIZE mismatch.")
279            if ! $cSize->equal(*$self->{CompSize});
280
281        return $self->TrailerError("USIZE mismatch.")
282            if ! $uSize->equal(*$self->{UnCompSize});
283    }
284
285    my $reachedEnd = STATUS_ERROR ;
286    # check for central directory or end of central directory
287    while (1)
288    {
289        my $magic ;
290        my $got = $self->smartRead(\$magic, 4);
291
292        return $self->saveErrorString(STATUS_ERROR, "Truncated file")
293            if $got != 4 && *$self->{Strict};
294
295        if ($got == 0) {
296            return STATUS_EOF ;
297        }
298        elsif ($got < 0) {
299            return STATUS_ERROR ;
300        }
301        elsif ($got < 4) {
302            $self->pushBack($magic)  ;
303            return STATUS_OK ;
304        }
305
306        my $sig = unpack("V", $magic) ;
307
308        my $hdr;
309        if ($hdr = $headerLookup{$sig})
310        {
311            if (&$hdr($self, $magic) != STATUS_OK ) {
312                if (*$self->{Strict}) {
313                    return STATUS_ERROR ;
314                }
315                else {
316                    $self->clearError();
317                    return STATUS_OK ;
318                }
319            }
320
321            if ($sig == ZIP_END_CENTRAL_HDR_SIG)
322            {
323                return STATUS_OK ;
324                last;
325            }
326        }
327        elsif ($sig == ZIP_LOCAL_HDR_SIG)
328        {
329            $self->pushBack($magic)  ;
330            return STATUS_OK ;
331        }
332        else
333        {
334            # put the data back
335            $self->pushBack($magic)  ;
336            last;
337        }
338    }
339
340    return $reachedEnd ;
341}
342
343sub skipCentralDirectory
344{
345    my $self = shift;
346    my $magic = shift ;
347
348    my $buffer;
349    $self->smartReadExact(\$buffer, 46 - 4)
350        or return $self->TrailerError("Minimum header size is " .
351                                     46 . " bytes") ;
352
353    my $keep = $magic . $buffer ;
354    *$self->{HeaderPending} = $keep ;
355
356   #my $versionMadeBy      = unpack ("v", substr($buffer, 4-4,  2));
357   #my $extractVersion     = unpack ("v", substr($buffer, 6-4,  2));
358   #my $gpFlag             = unpack ("v", substr($buffer, 8-4,  2));
359   #my $compressedMethod   = unpack ("v", substr($buffer, 10-4, 2));
360   #my $lastModTime        = unpack ("V", substr($buffer, 12-4, 4));
361   #my $crc32              = unpack ("V", substr($buffer, 16-4, 4));
362    my $compressedLength   = unpack ("V", substr($buffer, 20-4, 4));
363    my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
364    my $filename_length    = unpack ("v", substr($buffer, 28-4, 2));
365    my $extra_length       = unpack ("v", substr($buffer, 30-4, 2));
366    my $comment_length     = unpack ("v", substr($buffer, 32-4, 2));
367   #my $disk_start         = unpack ("v", substr($buffer, 34-4, 2));
368   #my $int_file_attrib    = unpack ("v", substr($buffer, 36-4, 2));
369   #my $ext_file_attrib    = unpack ("V", substr($buffer, 38-4, 2));
370   #my $lcl_hdr_offset     = unpack ("V", substr($buffer, 42-4, 2));
371
372
373    my $filename;
374    my $extraField;
375    my $comment ;
376    if ($filename_length)
377    {
378        $self->smartReadExact(\$filename, $filename_length)
379            or return $self->TruncatedTrailer("filename");
380        $keep .= $filename ;
381    }
382
383    if ($extra_length)
384    {
385        $self->smartReadExact(\$extraField, $extra_length)
386            or return $self->TruncatedTrailer("extra");
387        $keep .= $extraField ;
388    }
389
390    if ($comment_length)
391    {
392        $self->smartReadExact(\$comment, $comment_length)
393            or return $self->TruncatedTrailer("comment");
394        $keep .= $comment ;
395    }
396
397    return STATUS_OK ;
398}
399
400sub skipArchiveExtra
401{
402    my $self = shift;
403    my $magic = shift ;
404
405    my $buffer;
406    $self->smartReadExact(\$buffer, 4)
407        or return $self->TrailerError("Minimum header size is " .
408                                     4 . " bytes") ;
409
410    my $keep = $magic . $buffer ;
411
412    my $size = unpack ("V", $buffer);
413
414    $self->smartReadExact(\$buffer, $size)
415        or return $self->TrailerError("Minimum header size is " .
416                                     $size . " bytes") ;
417
418    $keep .= $buffer ;
419    *$self->{HeaderPending} = $keep ;
420
421    return STATUS_OK ;
422}
423
424
425sub skipCentralDirectory64Rec
426{
427    my $self = shift;
428    my $magic = shift ;
429
430    my $buffer;
431    $self->smartReadExact(\$buffer, 8)
432        or return $self->TrailerError("Minimum header size is " .
433                                     8 . " bytes") ;
434
435    my $keep = $magic . $buffer ;
436
437    my ($sizeLo, $sizeHi)  = unpack ("V V", $buffer);
438    my $size = $sizeHi * U64::MAX32 + $sizeLo;
439
440    $self->fastForward($size)
441        or return $self->TrailerError("Minimum header size is " .
442                                     $size . " bytes") ;
443
444   #$keep .= $buffer ;
445   #*$self->{HeaderPending} = $keep ;
446
447   #my $versionMadeBy      = unpack ("v",   substr($buffer,  0, 2));
448   #my $extractVersion     = unpack ("v",   substr($buffer,  2, 2));
449   #my $diskNumber         = unpack ("V",   substr($buffer,  4, 4));
450   #my $cntrlDirDiskNo     = unpack ("V",   substr($buffer,  8, 4));
451   #my $entriesInThisCD    = unpack ("V V", substr($buffer, 12, 8));
452   #my $entriesInCD        = unpack ("V V", substr($buffer, 20, 8));
453   #my $sizeOfCD           = unpack ("V V", substr($buffer, 28, 8));
454   #my $offsetToCD         = unpack ("V V", substr($buffer, 36, 8));
455
456    return STATUS_OK ;
457}
458
459sub skipCentralDirectory64Loc
460{
461    my $self = shift;
462    my $magic = shift ;
463
464    my $buffer;
465    $self->smartReadExact(\$buffer, 20 - 4)
466        or return $self->TrailerError("Minimum header size is " .
467                                     20 . " bytes") ;
468
469    my $keep = $magic . $buffer ;
470    *$self->{HeaderPending} = $keep ;
471
472   #my $startCdDisk        = unpack ("V",   substr($buffer,  4-4, 4));
473   #my $offsetToCD         = unpack ("V V", substr($buffer,  8-4, 8));
474   #my $diskCount          = unpack ("V",   substr($buffer, 16-4, 4));
475
476    return STATUS_OK ;
477}
478
479sub skipEndCentralDirectory
480{
481    my $self = shift;
482    my $magic = shift ;
483
484
485    my $buffer;
486    $self->smartReadExact(\$buffer, 22 - 4)
487        or return $self->TrailerError("Minimum header size is " .
488                                     22 . " bytes") ;
489
490    my $keep = $magic . $buffer ;
491    *$self->{HeaderPending} = $keep ;
492
493   #my $diskNumber         = unpack ("v", substr($buffer, 4-4,  2));
494   #my $cntrlDirDiskNo     = unpack ("v", substr($buffer, 6-4,  2));
495   #my $entriesInThisCD    = unpack ("v", substr($buffer, 8-4,  2));
496   #my $entriesInCD        = unpack ("v", substr($buffer, 10-4, 2));
497   #my $sizeOfCD           = unpack ("V", substr($buffer, 12-4, 4));
498   #my $offsetToCD         = unpack ("V", substr($buffer, 16-4, 4));
499    my $comment_length     = unpack ("v", substr($buffer, 20-4, 2));
500
501
502    my $comment ;
503    if ($comment_length)
504    {
505        $self->smartReadExact(\$comment, $comment_length)
506            or return $self->TruncatedTrailer("comment");
507        $keep .= $comment ;
508    }
509
510    return STATUS_OK ;
511}
512
513
514sub _isZipMagic
515{
516    my $buffer = shift ;
517    return 0 if length $buffer < 4 ;
518    my $sig = unpack("V", $buffer) ;
519    return $sig == ZIP_LOCAL_HDR_SIG ;
520}
521
522
523sub _readFullZipHeader($)
524{
525    my ($self) = @_ ;
526    my $magic = '' ;
527
528    $self->smartReadExact(\$magic, 4);
529
530    *$self->{HeaderPending} = $magic ;
531
532    return $self->HeaderError("Minimum header size is " .
533                              30 . " bytes")
534        if length $magic != 4 ;
535
536
537    return $self->HeaderError("Bad Magic")
538        if ! _isZipMagic($magic) ;
539
540    my $status = $self->_readZipHeader($magic);
541    delete *$self->{Transparent} if ! defined $status ;
542    return $status ;
543}
544
545sub _readZipHeader($)
546{
547    my ($self, $magic) = @_ ;
548    my ($HeaderCRC) ;
549    my ($buffer) = '' ;
550
551    $self->smartReadExact(\$buffer, 30 - 4)
552        or return $self->HeaderError("Minimum header size is " .
553                                     30 . " bytes") ;
554
555    my $keep = $magic . $buffer ;
556    *$self->{HeaderPending} = $keep ;
557
558    my $extractVersion     = unpack ("v", substr($buffer, 4-4,  2));
559    my $gpFlag             = unpack ("v", substr($buffer, 6-4,  2));
560    my $compressedMethod   = unpack ("v", substr($buffer, 8-4,  2));
561    my $lastModTime        = unpack ("V", substr($buffer, 10-4, 4));
562    my $crc32              = unpack ("V", substr($buffer, 14-4, 4));
563    my $compressedLength   = U64::newUnpack_V32 substr($buffer, 18-4, 4);
564    my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4);
565    my $filename_length    = unpack ("v", substr($buffer, 26-4, 2));
566    my $extra_length       = unpack ("v", substr($buffer, 28-4, 2));
567
568    my $filename;
569    my $extraField;
570    my @EXTRA = ();
571
572    # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out
573    # compressedLength/uncompressedLength & crc32 in the local file header.
574    # The expected data descriptor is not populated.
575    # So only assume streaming if the Streaming bit is set AND the compressed length is zero
576    my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK)  && $crc32 == 0) ? 1 : 0 ;
577
578    my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0;
579
580    return $self->HeaderError("Encrypted content not supported")
581        if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
582
583    return $self->HeaderError("Patch content not supported")
584        if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
585
586    *$self->{ZipData}{Streaming} = $streamingMode;
587
588
589    if ($filename_length)
590    {
591        $self->smartReadExact(\$filename, $filename_length)
592            or return $self->TruncatedHeader("Filename");
593
594        if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004)
595        {
596            require Encode;
597            eval { $filename = Encode::decode_utf8($filename, 1) }
598                or Carp::croak "Zip Filename not UTF-8" ;
599        }
600
601        $keep .= $filename ;
602    }
603
604    my $zip64 = 0 ;
605
606    if ($extra_length)
607    {
608        $self->smartReadExact(\$extraField, $extra_length)
609            or return $self->TruncatedHeader("Extra Field");
610
611        my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
612                                                \@EXTRA, 1, 0);
613        return $self->HeaderError($bad)
614            if defined $bad;
615
616        $keep .= $extraField ;
617
618        my %Extra ;
619        for (@EXTRA)
620        {
621            $Extra{$_->[0]} = \$_->[1];
622        }
623
624        if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
625        {
626            $zip64 = 1 ;
627
628            my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
629
630            # This code assumes that all the fields in the Zip64
631            # extra field aren't necessarily present. The spec says that
632            # they only exist if the equivalent local headers are -1.
633
634            if (! $streamingMode) {
635                my $offset = 0 ;
636
637                if (U64::full32 $uncompressedLength->get32bit() ) {
638                    $uncompressedLength
639                            = U64::newUnpack_V64 substr($buff, 0, 8);
640
641                    $offset += 8 ;
642                }
643
644                if (U64::full32 $compressedLength->get32bit() ) {
645
646                    $compressedLength
647                        = U64::newUnpack_V64 substr($buff, $offset, 8);
648
649                    $offset += 8 ;
650                }
651           }
652        }
653    }
654
655    *$self->{ZipData}{Zip64} = $zip64;
656
657    if (! $streamingMode) {
658        *$self->{ZipData}{Streaming} = 0;
659        *$self->{ZipData}{Crc32} = $crc32;
660        *$self->{ZipData}{CompressedLen} = $compressedLength;
661        *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
662        *$self->{CompressedInputLengthRemaining} =
663            *$self->{CompressedInputLength} = $compressedLength->get64bit();
664    }
665
666    *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef);
667    *$self->{ZipData}{Method} = $compressedMethod;
668    if ($compressedMethod == ZIP_CM_DEFLATE)
669    {
670        *$self->{Type} = 'zip-deflate';
671        my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0);
672
673        *$self->{Uncomp} = $obj;
674    }
675    elsif ($compressedMethod == ZIP_CM_BZIP2)
676    {
677        return $self->HeaderError("Unsupported Compression format $compressedMethod")
678            if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ;
679
680        *$self->{Type} = 'zip-bzip2';
681
682        my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();
683
684        *$self->{Uncomp} = $obj;
685    }
686    elsif ($compressedMethod == ZIP_CM_XZ)
687    {
688        return $self->HeaderError("Unsupported Compression format $compressedMethod")
689            if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ;
690
691        *$self->{Type} = 'zip-xz';
692
693        my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject();
694
695        *$self->{Uncomp} = $obj;
696    }
697    elsif ($compressedMethod == ZIP_CM_ZSTD)
698    {
699        return $self->HeaderError("Unsupported Compression format $compressedMethod")
700            if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ;
701
702        *$self->{Type} = 'zip-zstd';
703
704        my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject();
705
706        *$self->{Uncomp} = $obj;
707    }
708    elsif ($compressedMethod == ZIP_CM_LZMA)
709    {
710        return $self->HeaderError("Unsupported Compression format $compressedMethod")
711            if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ;
712
713        *$self->{Type} = 'zip-lzma';
714        my $LzmaHeader;
715        $self->smartReadExact(\$LzmaHeader, 4)
716                or return $self->saveErrorString(undef, "Truncated file");
717        my ($verHi, $verLo)   = unpack ("CC", substr($LzmaHeader, 0, 2));
718        my $LzmaPropertiesSize   = unpack ("v", substr($LzmaHeader, 2, 2));
719
720
721        my $LzmaPropertyData;
722        $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize)
723                or return $self->saveErrorString(undef, "Truncated file");
724
725        if (! $streamingMode) {
726            *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ;
727            *$self->{CompressedInputLengthRemaining} =
728                *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit();
729        }
730
731        my $obj =
732            IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData);
733
734        *$self->{Uncomp} = $obj;
735    }
736    elsif ($compressedMethod == ZIP_CM_STORE)
737    {
738        *$self->{Type} = 'zip-stored';
739
740        my $obj =
741        IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode,
742                                                          $zip64);
743
744        *$self->{Uncomp} = $obj;
745    }
746    else
747    {
748        return $self->HeaderError("Unsupported Compression format $compressedMethod");
749    }
750
751    return {
752        'Type'               => 'zip',
753        'FingerprintLength'  => 4,
754        #'HeaderLength'       => $compressedMethod == 8 ? length $keep : 0,
755        'HeaderLength'       => length $keep,
756        'Zip64'              => $zip64,
757        'TrailerLength'      => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
758        'Header'             => $keep,
759        'CompressedLength'   => $compressedLength ,
760        'UncompressedLength' => $uncompressedLength ,
761        'CRC32'              => $crc32 ,
762        'Name'               => $filename,
763        'efs'                => $efs_flag, # language encoding flag
764        'Time'               => _dosToUnixTime($lastModTime),
765        'Stream'             => $streamingMode,
766
767        'MethodID'           => $compressedMethod,
768        'MethodName'         => $MethodNames{$compressedMethod} || 'Unknown',
769
770#        'TextFlag'      => $flag & GZIP_FLG_FTEXT ? 1 : 0,
771#        'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
772#        'NameFlag'      => $flag & GZIP_FLG_FNAME ? 1 : 0,
773#        'CommentFlag'   => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
774#        'ExtraFlag'     => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
775#        'Comment'       => $comment,
776#        'OsID'          => $os,
777#        'OsName'        => defined $GZIP_OS_Names{$os}
778#                                 ? $GZIP_OS_Names{$os} : "Unknown",
779#        'HeaderCRC'     => $HeaderCRC,
780#        'Flags'         => $flag,
781#        'ExtraFlags'    => $xfl,
782        'ExtraFieldRaw' => $extraField,
783        'ExtraField'    => [ @EXTRA ],
784
785
786      }
787}
788
789sub filterUncompressed
790{
791    my $self = shift ;
792
793    if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) {
794        *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
795    }
796    else {
797        *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]);
798    }
799}
800
801
802# from Archive::Zip & info-zip
803sub _dosToUnixTime
804{
805	my $dt = shift;
806
807	my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
808	my $mon  = ( ( $dt >> 21 ) & 0x0f ) - 1;
809	my $mday = ( ( $dt >> 16 ) & 0x1f );
810
811	my $hour = ( ( $dt >> 11 ) & 0x1f );
812	my $min  = ( ( $dt >> 5 ) & 0x3f );
813	my $sec  = ( ( $dt << 1 ) & 0x3e );
814
815    use Time::Local ;
816    my $time_t = Time::Local::timelocal( $sec, $min, $hour, $mday, $mon, $year);
817    return 0 if ! defined $time_t;
818    return $time_t;
819
820}
821
822#sub scanCentralDirectory
823#{
824#    # Use cases
825#    # 1 32-bit CD
826#    # 2 64-bit CD
827#
828#    my $self = shift ;
829#
830#    my @CD = ();
831#    my $offset = $self->findCentralDirectoryOffset();
832#
833#    return 0
834#        if ! defined $offset;
835#
836#    $self->smarkSeek($offset, 0, SEEK_SET) ;
837#
838#    # Now walk the Central Directory Records
839#    my $buffer ;
840#    while ($self->smartReadExact(\$buffer, 46) &&
841#           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
842#
843#        my $compressedLength   = unpack ("V", substr($buffer, 20, 4));
844#        my $filename_length    = unpack ("v", substr($buffer, 28, 2));
845#        my $extra_length       = unpack ("v", substr($buffer, 30, 2));
846#        my $comment_length     = unpack ("v", substr($buffer, 32, 2));
847#
848#        $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR)
849#            if $extra_length || $comment_length || $filename_length;
850#        push @CD, $compressedLength ;
851#    }
852#
853#}
854#
855#sub findCentralDirectoryOffset
856#{
857#    my $self = shift ;
858#
859#    # Most common use-case is where there is no comment, so
860#    # know exactly where the end of central directory record
861#    # should be.
862#
863#    $self->smarkSeek(-22, 0, SEEK_END) ;
864#
865#    my $buffer;
866#    $self->smartReadExact(\$buffer, 22) ;
867#
868#    my $zip64 = 0;
869#    my $centralDirOffset ;
870#    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
871#        $centralDirOffset = unpack ("V", substr($buffer, 16, 2));
872#    }
873#    else {
874#        die "xxxx";
875#    }
876#
877#    return $centralDirOffset ;
878#}
879#
880#sub is84BitCD
881#{
882#    # TODO
883#    my $self = shift ;
884#}
885
886
887sub skip
888{
889    my $self = shift;
890    my $size = shift;
891
892    use Fcntl qw(SEEK_CUR);
893    if (ref $size eq 'U64') {
894        $self->smartSeek($size->get64bit(), SEEK_CUR);
895    }
896    else {
897        $self->smartSeek($size, SEEK_CUR);
898    }
899
900}
901
902
903sub scanCentralDirectory
904{
905    my $self = shift;
906
907    my $here = $self->tell();
908
909    # Use cases
910    # 1 32-bit CD
911    # 2 64-bit CD
912
913    my @CD = ();
914    my $offset = $self->findCentralDirectoryOffset();
915
916    return ()
917        if ! defined $offset;
918
919    $self->smarkSeek($offset, 0, SEEK_SET) ;
920
921    # Now walk the Central Directory Records
922    my $buffer ;
923    while ($self->smartReadExact(\$buffer, 46) &&
924           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
925
926        my $compressedLength   = unpack("V", substr($buffer, 20, 4));
927        my $uncompressedLength = unpack("V", substr($buffer, 24, 4));
928        my $filename_length    = unpack("v", substr($buffer, 28, 2));
929        my $extra_length       = unpack("v", substr($buffer, 30, 2));
930        my $comment_length     = unpack("v", substr($buffer, 32, 2));
931
932        $self->skip($filename_length ) ;
933
934        my $v64 = U64->new( $compressedLength );
935
936        if (U64::full32 $compressedLength ) {
937            $self->smartReadExact(\$buffer, $extra_length) ;
938            die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer)
939                if length($buffer) != $extra_length;
940            my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength);
941
942            # If not Zip64 extra field, assume size is 0xFFFFFFFF
943            $v64 = $got if defined $got;
944        }
945        else {
946            $self->skip($extra_length) ;
947        }
948
949        $self->skip($comment_length ) ;
950
951        push @CD, $v64 ;
952    }
953
954    $self->smartSeek($here, 0, SEEK_SET) ;
955
956    return @CD;
957}
958
959sub get64Extra
960{
961    my $self = shift ;
962
963    my $buffer = shift;
964    my $is_uncomp = shift ;
965
966    my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer);
967
968    if (! defined $extra)
969    {
970        return undef;
971    }
972    else
973    {
974        my $u64 = U64::newUnpack_V64(substr($extra,  $is_uncomp ? 8 : 0)) ;
975        return $u64;
976    }
977}
978
979sub offsetFromZip64
980{
981    my $self = shift ;
982    my $here = shift;
983
984    $self->smartSeek($here - 20, 0, SEEK_SET)
985        or die "xx $!" ;
986
987    my $buffer;
988    my $got = 0;
989    $self->smartReadExact(\$buffer, 20)
990        or die "xxx $here $got $!" ;
991
992    if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) {
993        my $cd64 = U64::Value_VV64 substr($buffer,  8, 8);
994
995        $self->smartSeek($cd64, 0, SEEK_SET) ;
996
997        $self->smartReadExact(\$buffer, 4)
998            or die "xxx" ;
999
1000        if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) {
1001
1002            $self->smartReadExact(\$buffer, 8)
1003                or die "xxx" ;
1004            my $size  = U64::Value_VV64($buffer);
1005            $self->smartReadExact(\$buffer, $size)
1006                or die "xxx" ;
1007
1008            my $cd64 =  U64::Value_VV64 substr($buffer,  36, 8);
1009
1010            return $cd64 ;
1011        }
1012
1013        die "zzz";
1014    }
1015
1016    die "zzz";
1017}
1018
1019use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG);
1020
1021sub findCentralDirectoryOffset
1022{
1023    my $self = shift ;
1024
1025    # Most common use-case is where there is no comment, so
1026    # know exactly where the end of central directory record
1027    # should be.
1028
1029    $self->smartSeek(-22, 0, SEEK_END) ;
1030    my $here = $self->tell();
1031
1032    my $buffer;
1033    $self->smartReadExact(\$buffer, 22)
1034        or die "xxx" ;
1035
1036    my $zip64 = 0;
1037    my $centralDirOffset ;
1038    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
1039        $centralDirOffset = unpack("V", substr($buffer, 16,  4));
1040    }
1041    else {
1042        $self->smartSeek(0, 0, SEEK_END) ;
1043
1044        my $fileLen = $self->tell();
1045        my $want = 0 ;
1046
1047        while(1) {
1048            $want += 1024;
1049            my $seekTo = $fileLen - $want;
1050            if ($seekTo < 0 ) {
1051                $seekTo = 0;
1052                $want = $fileLen ;
1053            }
1054            $self->smartSeek( $seekTo, 0, SEEK_SET)
1055                or die "xxx $!" ;
1056            my $got;
1057            $self->smartReadExact($buffer, $want)
1058                or die "xxx " ;
1059            my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG);
1060
1061            if ($pos >= 0) {
1062                #$here = $self->tell();
1063                $here = $seekTo + $pos ;
1064                $centralDirOffset = unpack("V", substr($buffer, $pos + 16,  4));
1065                last ;
1066            }
1067
1068            return undef
1069                if $want == $fileLen;
1070        }
1071    }
1072
1073    $centralDirOffset = $self->offsetFromZip64($here)
1074        if U64::full32 $centralDirOffset ;
1075
1076    return $centralDirOffset ;
1077}
1078
10791;
1080
1081__END__
1082
1083
1084=head1 NAME
1085
1086IO::Uncompress::Unzip - Read zip files/buffers
1087
1088=head1 SYNOPSIS
1089
1090    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1091
1092    my $status = unzip $input => $output [,OPTS]
1093        or die "unzip failed: $UnzipError\n";
1094
1095    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
1096        or die "unzip failed: $UnzipError\n";
1097
1098    $status = $z->read($buffer)
1099    $status = $z->read($buffer, $length)
1100    $status = $z->read($buffer, $length, $offset)
1101    $line = $z->getline()
1102    $char = $z->getc()
1103    $char = $z->ungetc()
1104    $char = $z->opened()
1105
1106    $status = $z->inflateSync()
1107
1108    $data = $z->trailingData()
1109    $status = $z->nextStream()
1110    $data = $z->getHeaderInfo()
1111    $z->tell()
1112    $z->seek($position, $whence)
1113    $z->binmode()
1114    $z->fileno()
1115    $z->eof()
1116    $z->close()
1117
1118    $UnzipError ;
1119
1120    # IO::File mode
1121
1122    <$z>
1123    read($z, $buffer);
1124    read($z, $buffer, $length);
1125    read($z, $buffer, $length, $offset);
1126    tell($z)
1127    seek($z, $position, $whence)
1128    binmode($z)
1129    fileno($z)
1130    eof($z)
1131    close($z)
1132
1133=head1 DESCRIPTION
1134
1135This module provides a Perl interface that allows the reading of
1136zlib files/buffers.
1137
1138For writing zip files/buffers, see the companion module IO::Compress::Zip.
1139
1140The primary purpose of this module is to provide I<streaming> read access to
1141zip files and buffers.
1142
1143At present the following compression methods are supported by IO::Uncompress::Unzip
1144
1145=over 5
1146
1147=item Store (0)
1148
1149=item Deflate (8)
1150
1151=item Bzip2 (12)
1152
1153To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must
1154be installed.
1155
1156=item Lzma (14)
1157
1158To read LZMA content, the module C<IO::Uncompress::UnLzma> must
1159be installed.
1160
1161=item Xz (95)
1162
1163To read Xz content, the module C<IO::Uncompress::UnXz> must
1164be installed.
1165
1166=item Zstandard (93)
1167
1168To read Zstandard content, the module C<IO::Uncompress::UnZstd> must
1169be installed.
1170
1171=back
1172
1173=head1 Functional Interface
1174
1175A top-level function, C<unzip>, is provided to carry out
1176"one-shot" uncompression between buffers and/or files. For finer
1177control over the uncompression process, see the L</"OO Interface">
1178section.
1179
1180    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1181
1182    unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS]
1183        or die "unzip failed: $UnzipError\n";
1184
1185The functional interface needs Perl5.005 or better.
1186
1187=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS]
1188
1189C<unzip> expects at least two parameters,
1190C<$input_filename_or_reference> and C<$output_filename_or_reference>
1191and zero or more optional parameters (see L</Optional Parameters>)
1192
1193=head3 The C<$input_filename_or_reference> parameter
1194
1195The parameter, C<$input_filename_or_reference>, is used to define the
1196source of the compressed data.
1197
1198It can take one of the following forms:
1199
1200=over 5
1201
1202=item A filename
1203
1204If the C<$input_filename_or_reference> parameter is a simple scalar, it is
1205assumed to be a filename. This file will be opened for reading and the
1206input data will be read from it.
1207
1208=item A filehandle
1209
1210If the C<$input_filename_or_reference> parameter is a filehandle, the input
1211data will be read from it.  The string '-' can be used as an alias for
1212standard input.
1213
1214=item A scalar reference
1215
1216If C<$input_filename_or_reference> is a scalar reference, the input data
1217will be read from C<$$input_filename_or_reference>.
1218
1219=item An array reference
1220
1221If C<$input_filename_or_reference> is an array reference, each element in
1222the array must be a filename.
1223
1224The input data will be read from each file in turn.
1225
1226The complete array will be walked to ensure that it only
1227contains valid filenames before any data is uncompressed.
1228
1229=item An Input FileGlob string
1230
1231If C<$input_filename_or_reference> is a string that is delimited by the
1232characters "<" and ">" C<unzip> will assume that it is an
1233I<input fileglob string>. The input is the list of files that match the
1234fileglob.
1235
1236See L<File::GlobMapper|File::GlobMapper> for more details.
1237
1238=back
1239
1240If the C<$input_filename_or_reference> parameter is any other type,
1241C<undef> will be returned.
1242
1243=head3 The C<$output_filename_or_reference> parameter
1244
1245The parameter C<$output_filename_or_reference> is used to control the
1246destination of the uncompressed data. This parameter can take one of
1247these forms.
1248
1249=over 5
1250
1251=item A filename
1252
1253If the C<$output_filename_or_reference> parameter is a simple scalar, it is
1254assumed to be a filename.  This file will be opened for writing and the
1255uncompressed data will be written to it.
1256
1257=item A filehandle
1258
1259If the C<$output_filename_or_reference> parameter is a filehandle, the
1260uncompressed data will be written to it.  The string '-' can be used as
1261an alias for standard output.
1262
1263=item A scalar reference
1264
1265If C<$output_filename_or_reference> is a scalar reference, the
1266uncompressed data will be stored in C<$$output_filename_or_reference>.
1267
1268=item An Array Reference
1269
1270If C<$output_filename_or_reference> is an array reference,
1271the uncompressed data will be pushed onto the array.
1272
1273=item An Output FileGlob
1274
1275If C<$output_filename_or_reference> is a string that is delimited by the
1276characters "<" and ">" C<unzip> will assume that it is an
1277I<output fileglob string>. The output is the list of files that match the
1278fileglob.
1279
1280When C<$output_filename_or_reference> is an fileglob string,
1281C<$input_filename_or_reference> must also be a fileglob string. Anything
1282else is an error.
1283
1284See L<File::GlobMapper|File::GlobMapper> for more details.
1285
1286=back
1287
1288If the C<$output_filename_or_reference> parameter is any other type,
1289C<undef> will be returned.
1290
1291=head2 Notes
1292
1293When C<$input_filename_or_reference> maps to multiple compressed
1294files/buffers and C<$output_filename_or_reference> is
1295a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a
1296concatenation of all the uncompressed data from each of the input
1297files/buffers.
1298
1299=head2 Optional Parameters
1300
1301The optional parameters for the one-shot function C<unzip>
1302are (for the most part) identical to those used with the OO interface defined in the
1303L</"Constructor Options"> section. The exceptions are listed below
1304
1305=over 5
1306
1307=item C<< AutoClose => 0|1 >>
1308
1309This option applies to any input or output data streams to
1310C<unzip> that are filehandles.
1311
1312If C<AutoClose> is specified, and the value is true, it will result in all
1313input and/or output filehandles being closed once C<unzip> has
1314completed.
1315
1316This parameter defaults to 0.
1317
1318=item C<< BinModeOut => 0|1 >>
1319
1320This option is now a no-op. All files will be written  in binmode.
1321
1322=item C<< Append => 0|1 >>
1323
1324The behaviour of this option is dependent on the type of output data
1325stream.
1326
1327=over 5
1328
1329=item * A Buffer
1330
1331If C<Append> is enabled, all uncompressed data will be append to the end of
1332the output buffer. Otherwise the output buffer will be cleared before any
1333uncompressed data is written to it.
1334
1335=item * A Filename
1336
1337If C<Append> is enabled, the file will be opened in append mode. Otherwise
1338the contents of the file, if any, will be truncated before any uncompressed
1339data is written to it.
1340
1341=item * A Filehandle
1342
1343If C<Append> is enabled, the filehandle will be positioned to the end of
1344the file via a call to C<seek> before any uncompressed data is
1345written to it.  Otherwise the file pointer will not be moved.
1346
1347=back
1348
1349When C<Append> is specified, and set to true, it will I<append> all uncompressed
1350data to the output data stream.
1351
1352So when the output is a filehandle it will carry out a seek to the eof
1353before writing any uncompressed data. If the output is a filename, it will be opened for
1354appending. If the output is a buffer, all uncompressed data will be
1355appended to the existing buffer.
1356
1357Conversely when C<Append> is not specified, or it is present and is set to
1358false, it will operate as follows.
1359
1360When the output is a filename, it will truncate the contents of the file
1361before writing any uncompressed data. If the output is a filehandle
1362its position will not be changed. If the output is a buffer, it will be
1363wiped before any uncompressed data is output.
1364
1365Defaults to 0.
1366
1367=item C<< MultiStream => 0|1 >>
1368
1369If the input file/buffer contains multiple compressed data streams, this
1370option will uncompress the whole lot as a single data stream.
1371
1372Defaults to 0.
1373
1374=item C<< TrailingData => $scalar >>
1375
1376Returns the data, if any, that is present immediately after the compressed
1377data stream once uncompression is complete.
1378
1379This option can be used when there is useful information immediately
1380following the compressed data stream, and you don't know the length of the
1381compressed data stream.
1382
1383If the input is a buffer, C<trailingData> will return everything from the
1384end of the compressed data stream to the end of the buffer.
1385
1386If the input is a filehandle, C<trailingData> will return the data that is
1387left in the filehandle input buffer once the end of the compressed data
1388stream has been reached. You can then use the filehandle to read the rest
1389of the input file.
1390
1391Don't bother using C<trailingData> if the input is a filename.
1392
1393If you know the length of the compressed data stream before you start
1394uncompressing, you can avoid having to use C<trailingData> by setting the
1395C<InputLength> option.
1396
1397=back
1398
1399=head2 Examples
1400
1401Say you have a zip file, C<file1.zip>, that only contains a
1402single member, you can read it and write the uncompressed data to the
1403file C<file1.txt> like this.
1404
1405    use strict ;
1406    use warnings ;
1407    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1408
1409    my $input = "file1.zip";
1410    my $output = "file1.txt";
1411    unzip $input => $output
1412        or die "unzip failed: $UnzipError\n";
1413
1414If you have a zip file that contains multiple members and want to read a
1415specific member from the file, say C<"data1">, use the C<Name> option
1416
1417    use strict ;
1418    use warnings ;
1419    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1420
1421    my $input = "file1.zip";
1422    my $output = "file1.txt";
1423    unzip $input => $output, Name => "data1"
1424        or die "unzip failed: $UnzipError\n";
1425
1426Alternatively, if you want to read the  C<"data1"> member into memory, use
1427a scalar reference for the C<output> parameter.
1428
1429    use strict ;
1430    use warnings ;
1431    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1432
1433    my $input = "file1.zip";
1434    my $output ;
1435    unzip $input => \$output, Name => "data1"
1436        or die "unzip failed: $UnzipError\n";
1437    # $output now contains the uncompressed data
1438
1439To read from an existing Perl filehandle, C<$input>, and write the
1440uncompressed data to a buffer, C<$buffer>.
1441
1442    use strict ;
1443    use warnings ;
1444    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1445    use IO::File ;
1446
1447    my $input = IO::File->new( "<file1.zip" )
1448        or die "Cannot open 'file1.zip': $!\n" ;
1449    my $buffer ;
1450    unzip $input => \$buffer
1451        or die "unzip failed: $UnzipError\n";
1452
1453=head1 OO Interface
1454
1455=head2 Constructor
1456
1457The format of the constructor for IO::Uncompress::Unzip is shown below
1458
1459    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
1460        or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1461
1462Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1463The variable C<$UnzipError> will contain an error message on failure.
1464
1465If you are running Perl 5.005 or better the object, C<$z>, returned from
1466IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1467This means that all normal input file operations can be carried out with
1468C<$z>.  For example, to read a line from a compressed file/buffer you can
1469use either of these forms
1470
1471    $line = $z->getline();
1472    $line = <$z>;
1473
1474The mandatory parameter C<$input> is used to determine the source of the
1475compressed data. This parameter can take one of three forms.
1476
1477=over 5
1478
1479=item A filename
1480
1481If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1482file will be opened for reading and the compressed data will be read from it.
1483
1484=item A filehandle
1485
1486If the C<$input> parameter is a filehandle, the compressed data will be
1487read from it.
1488The string '-' can be used as an alias for standard input.
1489
1490=item A scalar reference
1491
1492If C<$input> is a scalar reference, the compressed data will be read from
1493C<$$input>.
1494
1495=back
1496
1497=head2 Constructor Options
1498
1499The option names defined below are case insensitive and can be optionally
1500prefixed by a '-'.  So all of the following are valid
1501
1502    -AutoClose
1503    -autoclose
1504    AUTOCLOSE
1505    autoclose
1506
1507OPTS is a combination of the following options:
1508
1509=over 5
1510
1511=item C<< Name => "membername" >>
1512
1513Open "membername" from the zip file for reading.
1514
1515=item C<< Efs => 0| 1 >>
1516
1517When this option is set to true AND the zip archive being read has
1518the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8.
1519
1520If the member name in the zip archive is not valid UTF-8 when this optionn is true,
1521the script will die with an error message.
1522
1523Note that this option only works with Perl 5.8.4 or better.
1524
1525This option defaults to B<false>.
1526
1527=item C<< AutoClose => 0|1 >>
1528
1529This option is only valid when the C<$input> parameter is a filehandle. If
1530specified, and the value is true, it will result in the file being closed once
1531either the C<close> method is called or the IO::Uncompress::Unzip object is
1532destroyed.
1533
1534This parameter defaults to 0.
1535
1536=item C<< MultiStream => 0|1 >>
1537
1538Treats the complete zip file/buffer as a single compressed data
1539stream. When reading in multi-stream mode each member of the zip
1540file/buffer will be uncompressed in turn until the end of the file/buffer
1541is encountered.
1542
1543This parameter defaults to 0.
1544
1545=item C<< Prime => $string >>
1546
1547This option will uncompress the contents of C<$string> before processing the
1548input file/buffer.
1549
1550This option can be useful when the compressed data is embedded in another
1551file/data structure and it is not possible to work out where the compressed
1552data begins without having to read the first few bytes. If this is the
1553case, the uncompression can be I<primed> with these bytes using this
1554option.
1555
1556=item C<< Transparent => 0|1 >>
1557
1558If this option is set and the input file/buffer is not compressed data,
1559the module will allow reading of it anyway.
1560
1561In addition, if the input file/buffer does contain compressed data and
1562there is non-compressed data immediately following it, setting this option
1563will make this module treat the whole file/buffer as a single data stream.
1564
1565This option defaults to 1.
1566
1567=item C<< BlockSize => $num >>
1568
1569When reading the compressed input data, IO::Uncompress::Unzip will read it in
1570blocks of C<$num> bytes.
1571
1572This option defaults to 4096.
1573
1574=item C<< InputLength => $size >>
1575
1576When present this option will limit the number of compressed bytes read
1577from the input file/buffer to C<$size>. This option can be used in the
1578situation where there is useful data directly after the compressed data
1579stream and you know beforehand the exact length of the compressed data
1580stream.
1581
1582This option is mostly used when reading from a filehandle, in which case
1583the file pointer will be left pointing to the first byte directly after the
1584compressed data stream.
1585
1586This option defaults to off.
1587
1588=item C<< Append => 0|1 >>
1589
1590This option controls what the C<read> method does with uncompressed data.
1591
1592If set to 1, all uncompressed data will be appended to the output parameter
1593of the C<read> method.
1594
1595If set to 0, the contents of the output parameter of the C<read> method
1596will be overwritten by the uncompressed data.
1597
1598Defaults to 0.
1599
1600=item C<< Strict => 0|1 >>
1601
1602This option controls whether the extra checks defined below are used when
1603carrying out the decompression. When Strict is on, the extra tests are
1604carried out, when Strict is off they are not.
1605
1606The default for this option is off.
1607
1608=back
1609
1610=head2 Examples
1611
1612TODO
1613
1614=head1 Methods
1615
1616=head2 read
1617
1618Usage is
1619
1620    $status = $z->read($buffer)
1621
1622Reads a block of compressed data (the size of the compressed block is
1623determined by the C<Buffer> option in the constructor), uncompresses it and
1624writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1625set in the constructor, the uncompressed data will be appended to the
1626C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1627
1628Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1629or a negative number on error.
1630
1631=head2 read
1632
1633Usage is
1634
1635    $status = $z->read($buffer, $length)
1636    $status = $z->read($buffer, $length, $offset)
1637
1638    $status = read($z, $buffer, $length)
1639    $status = read($z, $buffer, $length, $offset)
1640
1641Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1642
1643The main difference between this form of the C<read> method and the
1644previous one, is that this one will attempt to return I<exactly> C<$length>
1645bytes. The only circumstances that this function will not is if end-of-file
1646or an IO error is encountered.
1647
1648Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1649or a negative number on error.
1650
1651=head2 getline
1652
1653Usage is
1654
1655    $line = $z->getline()
1656    $line = <$z>
1657
1658Reads a single line.
1659
1660This method fully supports the use of the variable C<$/> (or
1661C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1662determine what constitutes an end of line. Paragraph mode, record mode and
1663file slurp mode are all supported.
1664
1665=head2 getc
1666
1667Usage is
1668
1669    $char = $z->getc()
1670
1671Read a single character.
1672
1673=head2 ungetc
1674
1675Usage is
1676
1677    $char = $z->ungetc($string)
1678
1679=head2 inflateSync
1680
1681Usage is
1682
1683    $status = $z->inflateSync()
1684
1685TODO
1686
1687=head2 getHeaderInfo
1688
1689Usage is
1690
1691    $hdr  = $z->getHeaderInfo();
1692    @hdrs = $z->getHeaderInfo();
1693
1694This method returns either a hash reference (in scalar context) or a list
1695or hash references (in array context) that contains information about each
1696of the header fields in the compressed data stream(s).
1697
1698=head2 tell
1699
1700Usage is
1701
1702    $z->tell()
1703    tell $z
1704
1705Returns the uncompressed file offset.
1706
1707=head2 eof
1708
1709Usage is
1710
1711    $z->eof();
1712    eof($z);
1713
1714Returns true if the end of the compressed input stream has been reached.
1715
1716=head2 seek
1717
1718    $z->seek($position, $whence);
1719    seek($z, $position, $whence);
1720
1721Provides a sub-set of the C<seek> functionality, with the restriction
1722that it is only legal to seek forward in the input file/buffer.
1723It is a fatal error to attempt to seek backward.
1724
1725Note that the implementation of C<seek> in this module does not provide
1726true random access to a compressed file/buffer. It  works by uncompressing
1727data from the current offset in the file/buffer until it reaches the
1728uncompressed offset specified in the parameters to C<seek>. For very small
1729files this may be acceptable behaviour. For large files it may cause an
1730unacceptable delay.
1731
1732The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1733SEEK_CUR or SEEK_END.
1734
1735Returns 1 on success, 0 on failure.
1736
1737=head2 binmode
1738
1739Usage is
1740
1741    $z->binmode
1742    binmode $z ;
1743
1744This is a noop provided for completeness.
1745
1746=head2 opened
1747
1748    $z->opened()
1749
1750Returns true if the object currently refers to a opened file/buffer.
1751
1752=head2 autoflush
1753
1754    my $prev = $z->autoflush()
1755    my $prev = $z->autoflush(EXPR)
1756
1757If the C<$z> object is associated with a file or a filehandle, this method
1758returns the current autoflush setting for the underlying filehandle. If
1759C<EXPR> is present, and is non-zero, it will enable flushing after every
1760write/print operation.
1761
1762If C<$z> is associated with a buffer, this method has no effect and always
1763returns C<undef>.
1764
1765B<Note> that the special variable C<$|> B<cannot> be used to set or
1766retrieve the autoflush setting.
1767
1768=head2 input_line_number
1769
1770    $z->input_line_number()
1771    $z->input_line_number(EXPR)
1772
1773Returns the current uncompressed line number. If C<EXPR> is present it has
1774the effect of setting the line number. Note that setting the line number
1775does not change the current position within the file/buffer being read.
1776
1777The contents of C<$/> are used to determine what constitutes a line
1778terminator.
1779
1780=head2 fileno
1781
1782    $z->fileno()
1783    fileno($z)
1784
1785If the C<$z> object is associated with a file or a filehandle, C<fileno>
1786will return the underlying file descriptor. Once the C<close> method is
1787called C<fileno> will return C<undef>.
1788
1789If the C<$z> object is associated with a buffer, this method will return
1790C<undef>.
1791
1792=head2 close
1793
1794    $z->close() ;
1795    close $z ;
1796
1797Closes the output file/buffer.
1798
1799For most versions of Perl this method will be automatically invoked if
1800the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1801variable with the reference to the object going out of scope). The
1802exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1803these cases, the C<close> method will be called automatically, but
1804not until global destruction of all live objects when the program is
1805terminating.
1806
1807Therefore, if you want your scripts to be able to run on all versions
1808of Perl, you should call C<close> explicitly and not rely on automatic
1809closing.
1810
1811Returns true on success, otherwise 0.
1812
1813If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1814object was created, and the object is associated with a file, the
1815underlying file will also be closed.
1816
1817=head2 nextStream
1818
1819Usage is
1820
1821    my $status = $z->nextStream();
1822
1823Skips to the next compressed data stream in the input file/buffer. If a new
1824compressed data stream is found, the eof marker will be cleared and C<$.>
1825will be reset to 0.
1826
1827If trailing data is present immediately after the zip archive and the
1828C<Transparent> option is enabled, this method will consider that trailing
1829data to be another member of the zip archive.
1830
1831Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1832error was encountered.
1833
1834=head2 trailingData
1835
1836Usage is
1837
1838    my $data = $z->trailingData();
1839
1840Returns the data, if any, that is present immediately after the compressed
1841data stream once uncompression is complete. It only makes sense to call
1842this method once the end of the compressed data stream has been
1843encountered.
1844
1845This option can be used when there is useful information immediately
1846following the compressed data stream, and you don't know the length of the
1847compressed data stream.
1848
1849If the input is a buffer, C<trailingData> will return everything from the
1850end of the compressed data stream to the end of the buffer.
1851
1852If the input is a filehandle, C<trailingData> will return the data that is
1853left in the filehandle input buffer once the end of the compressed data
1854stream has been reached. You can then use the filehandle to read the rest
1855of the input file.
1856
1857Don't bother using C<trailingData> if the input is a filename.
1858
1859If you know the length of the compressed data stream before you start
1860uncompressing, you can avoid having to use C<trailingData> by setting the
1861C<InputLength> option in the constructor.
1862
1863=head1 Importing
1864
1865No symbolic constants are required by IO::Uncompress::Unzip at present.
1866
1867=over 5
1868
1869=item :all
1870
1871Imports C<unzip> and C<$UnzipError>.
1872Same as doing this
1873
1874    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1875
1876=back
1877
1878=head1 EXAMPLES
1879
1880=head2 Working with Net::FTP
1881
1882See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP">
1883
1884=head2 Walking through a zip file
1885
1886The code below can be used to traverse a zip file, one compressed data
1887stream at a time.
1888
1889    use IO::Uncompress::Unzip qw($UnzipError);
1890
1891    my $zipfile = "somefile.zip";
1892    my $u = IO::Uncompress::Unzip->new( $zipfile )
1893        or die "Cannot open $zipfile: $UnzipError";
1894
1895    my $status;
1896    for ($status = 1; $status > 0; $status = $u->nextStream())
1897    {
1898
1899        my $name = $u->getHeaderInfo()->{Name};
1900        warn "Processing member $name\n" ;
1901
1902        my $buff;
1903        while (($status = $u->read($buff)) > 0) {
1904            # Do something here
1905        }
1906
1907        last if $status < 0;
1908    }
1909
1910    die "Error processing $zipfile: $!\n"
1911        if $status < 0 ;
1912
1913Each individual compressed data stream is read until the logical
1914end-of-file is reached. Then C<nextStream> is called. This will skip to the
1915start of the next compressed data stream and clear the end-of-file flag.
1916
1917It is also worth noting that C<nextStream> can be called at any time -- you
1918don't have to wait until you have exhausted a compressed data stream before
1919skipping to the next one.
1920
1921=head2 Unzipping a complete zip file to disk
1922
1923Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip>
1924to read a zip file and unzip its contents to disk.
1925
1926The script is available from L<https://gist.github.com/eqhmcow/5389877>
1927
1928=head1 SUPPORT
1929
1930General feedback/questions/bug reports should be sent to
1931L<https://github.com/pmqs/IO-Compress/issues> (preferred) or
1932L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>.
1933
1934=head1 SEE ALSO
1935
1936L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1937
1938L<IO::Compress::FAQ|IO::Compress::FAQ>
1939
1940L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1941L<Archive::Tar|Archive::Tar>,
1942L<IO::Zlib|IO::Zlib>
1943
1944For RFC 1950, 1951 and 1952 see
1945L<https://datatracker.ietf.org/doc/html/rfc1950>,
1946L<https://datatracker.ietf.org/doc/html/rfc1951> and
1947L<https://datatracker.ietf.org/doc/html/rfc1952>
1948
1949The I<zlib> compression library was written by Jean-loup Gailly
1950C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>.
1951
1952The primary site for the I<zlib> compression library is
1953L<http://www.zlib.org>.
1954
1955The primary site for the I<zlib-ng> compression library is
1956L<https://github.com/zlib-ng/zlib-ng>.
1957
1958The primary site for gzip is L<http://www.gzip.org>.
1959
1960=head1 AUTHOR
1961
1962This module was written by Paul Marquess, C<pmqs@cpan.org>.
1963
1964=head1 MODIFICATION HISTORY
1965
1966See the Changes file.
1967
1968=head1 COPYRIGHT AND LICENSE
1969
1970Copyright (c) 2005-2023 Paul Marquess. All rights reserved.
1971
1972This program is free software; you can redistribute it and/or
1973modify it under the same terms as Perl itself.
1974