1package IO::Uncompress::Unzip; 2 3require 5.006 ; 4 5# for RFC1952 6 7use strict ; 8use warnings; 9use bytes; 10 11use IO::File; 12use IO::Uncompress::RawInflate 2.204 ; 13use IO::Compress::Base::Common 2.204 qw(:Status ); 14use IO::Uncompress::Adapter::Inflate 2.204 ; 15use IO::Uncompress::Adapter::Identity 2.204 ; 16use IO::Compress::Zlib::Extra 2.204 ; 17use IO::Compress::Zip::Constants 2.204 ; 18 19use Compress::Raw::Zlib 2.204 () ; 20 21BEGIN 22{ 23 # Don't trigger any __DIE__ Hooks. 24 local $SIG{__DIE__}; 25 26 eval{ require IO::Uncompress::Adapter::Bunzip2 ; 27 IO::Uncompress::Adapter::Bunzip2->import() } ; 28 eval{ require IO::Uncompress::Adapter::UnLzma ; 29 IO::Uncompress::Adapter::UnLzma->import() } ; 30 eval{ require IO::Uncompress::Adapter::UnXz ; 31 IO::Uncompress::Adapter::UnXz->import() } ; 32 eval{ require IO::Uncompress::Adapter::UnZstd ; 33 IO::Uncompress::Adapter::UnZstd->import() } ; 34} 35 36 37require Exporter ; 38 39our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); 40 41$VERSION = '2.204'; 42$UnzipError = ''; 43 44@ISA = qw(IO::Uncompress::RawInflate Exporter); 45@EXPORT_OK = qw($UnzipError unzip ); 46%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ; 47push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; 48Exporter::export_ok_tags('all'); 49 50%headerLookup = ( 51 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory, 52 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory, 53 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec, 54 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc, 55 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra, 56 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature, 57 ); 58 59my %MethodNames = ( 60 ZIP_CM_DEFLATE() => 'Deflated', 61 ZIP_CM_BZIP2() => 'Bzip2', 62 ZIP_CM_LZMA() => 'Lzma', 63 ZIP_CM_STORE() => 'Stored', 64 ZIP_CM_XZ() => 'Xz', 65 ZIP_CM_ZSTD() => 'Zstd', 66 ); 67 68sub new 69{ 70 my $class = shift ; 71 my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError); 72 $obj->_create(undef, 0, @_); 73} 74 75sub unzip 76{ 77 my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError); 78 return $obj->_inf(@_) ; 79} 80 81sub getExtraParams 82{ 83 84 return ( 85# # Zip header fields 86 'name' => [IO::Compress::Base::Common::Parse_any, undef], 87 88 'stream' => [IO::Compress::Base::Common::Parse_boolean, 0], 89 'efs' => [IO::Compress::Base::Common::Parse_boolean, 0], 90 91 # TODO - This means reading the central directory to get 92 # 1. the local header offsets 93 # 2. The compressed data length 94 ); 95} 96 97sub ckParams 98{ 99 my $self = shift ; 100 my $got = shift ; 101 102 # unzip always needs crc32 103 $got->setValue('crc32' => 1); 104 105 *$self->{UnzipData}{Name} = $got->getValue('name'); 106 *$self->{UnzipData}{efs} = $got->getValue('efs'); 107 108 return 1; 109} 110 111sub mkUncomp 112{ 113 my $self = shift ; 114 my $got = shift ; 115 116 my $magic = $self->ckMagic() 117 or return 0; 118 119 *$self->{Info} = $self->readHeader($magic) 120 or return undef ; 121 122 return 1; 123 124} 125 126sub ckMagic 127{ 128 my $self = shift; 129 130 my $magic ; 131 $self->smartReadExact(\$magic, 4); 132 133 *$self->{HeaderPending} = $magic ; 134 135 return $self->HeaderError("Minimum header size is " . 136 4 . " bytes") 137 if length $magic != 4 ; 138 139 return $self->HeaderError("Bad Magic") 140 if ! _isZipMagic($magic) ; 141 142 *$self->{Type} = 'zip'; 143 144 return $magic ; 145} 146 147 148sub fastForward 149{ 150 my $self = shift; 151 my $offset = shift; 152 153 # TODO - if Stream isn't enabled & reading from file, use seek 154 155 my $buffer = ''; 156 my $c = 1024 * 16; 157 158 while ($offset > 0) 159 { 160 $c = length $offset 161 if length $offset < $c ; 162 163 $offset -= $c; 164 165 $self->smartReadExact(\$buffer, $c) 166 or return 0; 167 } 168 169 return 1; 170} 171 172 173sub readHeader 174{ 175 my $self = shift; 176 my $magic = shift ; 177 178 my $name = *$self->{UnzipData}{Name} ; 179 my $hdr = $self->_readZipHeader($magic) ; 180 181 while (defined $hdr) 182 { 183 if (! defined $name || $hdr->{Name} eq $name) 184 { 185 return $hdr ; 186 } 187 188 # skip the data 189 # TODO - when Stream is off, use seek 190 my $buffer; 191 if (*$self->{ZipData}{Streaming}) { 192 while (1) { 193 194 my $b; 195 my $status = $self->smartRead(\$b, 1024 * 16); 196 197 return $self->saveErrorString(undef, "Truncated file") 198 if $status <= 0 ; 199 200 my $temp_buf ; 201 my $out; 202 203 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out); 204 205 return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, 206 *$self->{Uncomp}{ErrorNo}) 207 if $self->saveStatus($status) == STATUS_ERROR; 208 209 $self->pushBack($b) ; 210 211 if ($status == STATUS_ENDSTREAM) { 212 *$self->{Uncomp}->reset(); 213 last; 214 } 215 } 216 217 # skip the trailer 218 $self->smartReadExact(\$buffer, $hdr->{TrailerLength}) 219 or return $self->saveErrorString(undef, "Truncated file"); 220 } 221 else { 222 my $c = $hdr->{CompressedLength}->get64bit(); 223 $self->fastForward($c) 224 or return $self->saveErrorString(undef, "Truncated file"); 225 $buffer = ''; 226 } 227 228 $self->chkTrailer($buffer) == STATUS_OK 229 or return $self->saveErrorString(undef, "Truncated file"); 230 231 $hdr = $self->_readFullZipHeader(); 232 233 return $self->saveErrorString(undef, "Cannot find '$name'") 234 if $self->smartEof(); 235 } 236 237 return undef; 238} 239 240sub chkTrailer 241{ 242 my $self = shift; 243 my $trailer = shift; 244 245 my ($sig, $CRC32, $cSize, $uSize) ; 246 my ($cSizeHi, $uSizeHi) = (0, 0); 247 if (*$self->{ZipData}{Streaming}) { 248 $sig = unpack ("V", substr($trailer, 0, 4)); 249 $CRC32 = unpack ("V", substr($trailer, 4, 4)); 250 251 if (*$self->{ZipData}{Zip64} ) { 252 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8); 253 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8); 254 } 255 else { 256 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4); 257 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4); 258 } 259 260 return $self->TrailerError("Data Descriptor signature, got $sig") 261 if $sig != ZIP_DATA_HDR_SIG; 262 } 263 else { 264 ($CRC32, $cSize, $uSize) = 265 (*$self->{ZipData}{Crc32}, 266 *$self->{ZipData}{CompressedLen}, 267 *$self->{ZipData}{UnCompressedLen}); 268 } 269 270 *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ; 271 *$self->{Info}{CompressedLength} = $cSize->get64bit(); 272 *$self->{Info}{UncompressedLength} = $uSize->get64bit(); 273 274 if (*$self->{Strict}) { 275 return $self->TrailerError("CRC mismatch") 276 if $CRC32 != *$self->{ZipData}{CRC32} ; 277 278 return $self->TrailerError("CSIZE mismatch.") 279 if ! $cSize->equal(*$self->{CompSize}); 280 281 return $self->TrailerError("USIZE mismatch.") 282 if ! $uSize->equal(*$self->{UnCompSize}); 283 } 284 285 my $reachedEnd = STATUS_ERROR ; 286 # check for central directory or end of central directory 287 while (1) 288 { 289 my $magic ; 290 my $got = $self->smartRead(\$magic, 4); 291 292 return $self->saveErrorString(STATUS_ERROR, "Truncated file") 293 if $got != 4 && *$self->{Strict}; 294 295 if ($got == 0) { 296 return STATUS_EOF ; 297 } 298 elsif ($got < 0) { 299 return STATUS_ERROR ; 300 } 301 elsif ($got < 4) { 302 $self->pushBack($magic) ; 303 return STATUS_OK ; 304 } 305 306 my $sig = unpack("V", $magic) ; 307 308 my $hdr; 309 if ($hdr = $headerLookup{$sig}) 310 { 311 if (&$hdr($self, $magic) != STATUS_OK ) { 312 if (*$self->{Strict}) { 313 return STATUS_ERROR ; 314 } 315 else { 316 $self->clearError(); 317 return STATUS_OK ; 318 } 319 } 320 321 if ($sig == ZIP_END_CENTRAL_HDR_SIG) 322 { 323 return STATUS_OK ; 324 last; 325 } 326 } 327 elsif ($sig == ZIP_LOCAL_HDR_SIG) 328 { 329 $self->pushBack($magic) ; 330 return STATUS_OK ; 331 } 332 else 333 { 334 # put the data back 335 $self->pushBack($magic) ; 336 last; 337 } 338 } 339 340 return $reachedEnd ; 341} 342 343sub skipCentralDirectory 344{ 345 my $self = shift; 346 my $magic = shift ; 347 348 my $buffer; 349 $self->smartReadExact(\$buffer, 46 - 4) 350 or return $self->TrailerError("Minimum header size is " . 351 46 . " bytes") ; 352 353 my $keep = $magic . $buffer ; 354 *$self->{HeaderPending} = $keep ; 355 356 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2)); 357 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2)); 358 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2)); 359 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2)); 360 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4)); 361 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4)); 362 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4)); 363 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4)); 364 my $filename_length = unpack ("v", substr($buffer, 28-4, 2)); 365 my $extra_length = unpack ("v", substr($buffer, 30-4, 2)); 366 my $comment_length = unpack ("v", substr($buffer, 32-4, 2)); 367 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2)); 368 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2)); 369 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2)); 370 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2)); 371 372 373 my $filename; 374 my $extraField; 375 my $comment ; 376 if ($filename_length) 377 { 378 $self->smartReadExact(\$filename, $filename_length) 379 or return $self->TruncatedTrailer("filename"); 380 $keep .= $filename ; 381 } 382 383 if ($extra_length) 384 { 385 $self->smartReadExact(\$extraField, $extra_length) 386 or return $self->TruncatedTrailer("extra"); 387 $keep .= $extraField ; 388 } 389 390 if ($comment_length) 391 { 392 $self->smartReadExact(\$comment, $comment_length) 393 or return $self->TruncatedTrailer("comment"); 394 $keep .= $comment ; 395 } 396 397 return STATUS_OK ; 398} 399 400sub skipArchiveExtra 401{ 402 my $self = shift; 403 my $magic = shift ; 404 405 my $buffer; 406 $self->smartReadExact(\$buffer, 4) 407 or return $self->TrailerError("Minimum header size is " . 408 4 . " bytes") ; 409 410 my $keep = $magic . $buffer ; 411 412 my $size = unpack ("V", $buffer); 413 414 $self->smartReadExact(\$buffer, $size) 415 or return $self->TrailerError("Minimum header size is " . 416 $size . " bytes") ; 417 418 $keep .= $buffer ; 419 *$self->{HeaderPending} = $keep ; 420 421 return STATUS_OK ; 422} 423 424 425sub skipCentralDirectory64Rec 426{ 427 my $self = shift; 428 my $magic = shift ; 429 430 my $buffer; 431 $self->smartReadExact(\$buffer, 8) 432 or return $self->TrailerError("Minimum header size is " . 433 8 . " bytes") ; 434 435 my $keep = $magic . $buffer ; 436 437 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer); 438 my $size = $sizeHi * U64::MAX32 + $sizeLo; 439 440 $self->fastForward($size) 441 or return $self->TrailerError("Minimum header size is " . 442 $size . " bytes") ; 443 444 #$keep .= $buffer ; 445 #*$self->{HeaderPending} = $keep ; 446 447 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2)); 448 #my $extractVersion = unpack ("v", substr($buffer, 2, 2)); 449 #my $diskNumber = unpack ("V", substr($buffer, 4, 4)); 450 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4)); 451 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8)); 452 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8)); 453 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8)); 454 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8)); 455 456 return STATUS_OK ; 457} 458 459sub skipCentralDirectory64Loc 460{ 461 my $self = shift; 462 my $magic = shift ; 463 464 my $buffer; 465 $self->smartReadExact(\$buffer, 20 - 4) 466 or return $self->TrailerError("Minimum header size is " . 467 20 . " bytes") ; 468 469 my $keep = $magic . $buffer ; 470 *$self->{HeaderPending} = $keep ; 471 472 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4)); 473 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8)); 474 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4)); 475 476 return STATUS_OK ; 477} 478 479sub skipEndCentralDirectory 480{ 481 my $self = shift; 482 my $magic = shift ; 483 484 485 my $buffer; 486 $self->smartReadExact(\$buffer, 22 - 4) 487 or return $self->TrailerError("Minimum header size is " . 488 22 . " bytes") ; 489 490 my $keep = $magic . $buffer ; 491 *$self->{HeaderPending} = $keep ; 492 493 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2)); 494 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2)); 495 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2)); 496 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2)); 497 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 4)); 498 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 4)); 499 my $comment_length = unpack ("v", substr($buffer, 20-4, 2)); 500 501 502 my $comment ; 503 if ($comment_length) 504 { 505 $self->smartReadExact(\$comment, $comment_length) 506 or return $self->TruncatedTrailer("comment"); 507 $keep .= $comment ; 508 } 509 510 return STATUS_OK ; 511} 512 513 514sub _isZipMagic 515{ 516 my $buffer = shift ; 517 return 0 if length $buffer < 4 ; 518 my $sig = unpack("V", $buffer) ; 519 return $sig == ZIP_LOCAL_HDR_SIG ; 520} 521 522 523sub _readFullZipHeader($) 524{ 525 my ($self) = @_ ; 526 my $magic = '' ; 527 528 $self->smartReadExact(\$magic, 4); 529 530 *$self->{HeaderPending} = $magic ; 531 532 return $self->HeaderError("Minimum header size is " . 533 30 . " bytes") 534 if length $magic != 4 ; 535 536 537 return $self->HeaderError("Bad Magic") 538 if ! _isZipMagic($magic) ; 539 540 my $status = $self->_readZipHeader($magic); 541 delete *$self->{Transparent} if ! defined $status ; 542 return $status ; 543} 544 545sub _readZipHeader($) 546{ 547 my ($self, $magic) = @_ ; 548 my ($HeaderCRC) ; 549 my ($buffer) = '' ; 550 551 $self->smartReadExact(\$buffer, 30 - 4) 552 or return $self->HeaderError("Minimum header size is " . 553 30 . " bytes") ; 554 555 my $keep = $magic . $buffer ; 556 *$self->{HeaderPending} = $keep ; 557 558 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2)); 559 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2)); 560 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2)); 561 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4)); 562 my $crc32 = unpack ("V", substr($buffer, 14-4, 4)); 563 my $compressedLength = U64::newUnpack_V32 substr($buffer, 18-4, 4); 564 my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4); 565 my $filename_length = unpack ("v", substr($buffer, 26-4, 2)); 566 my $extra_length = unpack ("v", substr($buffer, 28-4, 2)); 567 568 my $filename; 569 my $extraField; 570 my @EXTRA = (); 571 572 # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out 573 # compressedLength/uncompressedLength & crc32 in the local file header. 574 # The expected data descriptor is not populated. 575 # So only assume streaming if the Streaming bit is set AND the compressed length is zero 576 my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) && $crc32 == 0) ? 1 : 0 ; 577 578 my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0; 579 580 return $self->HeaderError("Encrypted content not supported") 581 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK); 582 583 return $self->HeaderError("Patch content not supported") 584 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK; 585 586 *$self->{ZipData}{Streaming} = $streamingMode; 587 588 589 if ($filename_length) 590 { 591 $self->smartReadExact(\$filename, $filename_length) 592 or return $self->TruncatedHeader("Filename"); 593 594 if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004) 595 { 596 require Encode; 597 eval { $filename = Encode::decode_utf8($filename, 1) } 598 or Carp::croak "Zip Filename not UTF-8" ; 599 } 600 601 $keep .= $filename ; 602 } 603 604 my $zip64 = 0 ; 605 606 if ($extra_length) 607 { 608 $self->smartReadExact(\$extraField, $extra_length) 609 or return $self->TruncatedHeader("Extra Field"); 610 611 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField, 612 \@EXTRA, 1, 0); 613 return $self->HeaderError($bad) 614 if defined $bad; 615 616 $keep .= $extraField ; 617 618 my %Extra ; 619 for (@EXTRA) 620 { 621 $Extra{$_->[0]} = \$_->[1]; 622 } 623 624 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()}) 625 { 626 $zip64 = 1 ; 627 628 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} }; 629 630 # This code assumes that all the fields in the Zip64 631 # extra field aren't necessarily present. The spec says that 632 # they only exist if the equivalent local headers are -1. 633 634 if (! $streamingMode) { 635 my $offset = 0 ; 636 637 if (U64::full32 $uncompressedLength->get32bit() ) { 638 $uncompressedLength 639 = U64::newUnpack_V64 substr($buff, 0, 8); 640 641 $offset += 8 ; 642 } 643 644 if (U64::full32 $compressedLength->get32bit() ) { 645 646 $compressedLength 647 = U64::newUnpack_V64 substr($buff, $offset, 8); 648 649 $offset += 8 ; 650 } 651 } 652 } 653 } 654 655 *$self->{ZipData}{Zip64} = $zip64; 656 657 if (! $streamingMode) { 658 *$self->{ZipData}{Streaming} = 0; 659 *$self->{ZipData}{Crc32} = $crc32; 660 *$self->{ZipData}{CompressedLen} = $compressedLength; 661 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength; 662 *$self->{CompressedInputLengthRemaining} = 663 *$self->{CompressedInputLength} = $compressedLength->get64bit(); 664 } 665 666 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); 667 *$self->{ZipData}{Method} = $compressedMethod; 668 if ($compressedMethod == ZIP_CM_DEFLATE) 669 { 670 *$self->{Type} = 'zip-deflate'; 671 my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0); 672 673 *$self->{Uncomp} = $obj; 674 } 675 elsif ($compressedMethod == ZIP_CM_BZIP2) 676 { 677 return $self->HeaderError("Unsupported Compression format $compressedMethod") 678 if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ; 679 680 *$self->{Type} = 'zip-bzip2'; 681 682 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); 683 684 *$self->{Uncomp} = $obj; 685 } 686 elsif ($compressedMethod == ZIP_CM_XZ) 687 { 688 return $self->HeaderError("Unsupported Compression format $compressedMethod") 689 if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ; 690 691 *$self->{Type} = 'zip-xz'; 692 693 my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject(); 694 695 *$self->{Uncomp} = $obj; 696 } 697 elsif ($compressedMethod == ZIP_CM_ZSTD) 698 { 699 return $self->HeaderError("Unsupported Compression format $compressedMethod") 700 if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ; 701 702 *$self->{Type} = 'zip-zstd'; 703 704 my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject(); 705 706 *$self->{Uncomp} = $obj; 707 } 708 elsif ($compressedMethod == ZIP_CM_LZMA) 709 { 710 return $self->HeaderError("Unsupported Compression format $compressedMethod") 711 if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ; 712 713 *$self->{Type} = 'zip-lzma'; 714 my $LzmaHeader; 715 $self->smartReadExact(\$LzmaHeader, 4) 716 or return $self->saveErrorString(undef, "Truncated file"); 717 my ($verHi, $verLo) = unpack ("CC", substr($LzmaHeader, 0, 2)); 718 my $LzmaPropertiesSize = unpack ("v", substr($LzmaHeader, 2, 2)); 719 720 721 my $LzmaPropertyData; 722 $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize) 723 or return $self->saveErrorString(undef, "Truncated file"); 724 725 if (! $streamingMode) { 726 *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ; 727 *$self->{CompressedInputLengthRemaining} = 728 *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit(); 729 } 730 731 my $obj = 732 IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData); 733 734 *$self->{Uncomp} = $obj; 735 } 736 elsif ($compressedMethod == ZIP_CM_STORE) 737 { 738 *$self->{Type} = 'zip-stored'; 739 740 my $obj = 741 IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode, 742 $zip64); 743 744 *$self->{Uncomp} = $obj; 745 } 746 else 747 { 748 return $self->HeaderError("Unsupported Compression format $compressedMethod"); 749 } 750 751 return { 752 'Type' => 'zip', 753 'FingerprintLength' => 4, 754 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0, 755 'HeaderLength' => length $keep, 756 'Zip64' => $zip64, 757 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16, 758 'Header' => $keep, 759 'CompressedLength' => $compressedLength , 760 'UncompressedLength' => $uncompressedLength , 761 'CRC32' => $crc32 , 762 'Name' => $filename, 763 'efs' => $efs_flag, # language encoding flag 764 'Time' => _dosToUnixTime($lastModTime), 765 'Stream' => $streamingMode, 766 767 'MethodID' => $compressedMethod, 768 'MethodName' => $MethodNames{$compressedMethod} || 'Unknown', 769 770# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, 771# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, 772# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, 773# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, 774# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, 775# 'Comment' => $comment, 776# 'OsID' => $os, 777# 'OsName' => defined $GZIP_OS_Names{$os} 778# ? $GZIP_OS_Names{$os} : "Unknown", 779# 'HeaderCRC' => $HeaderCRC, 780# 'Flags' => $flag, 781# 'ExtraFlags' => $xfl, 782 'ExtraFieldRaw' => $extraField, 783 'ExtraField' => [ @EXTRA ], 784 785 786 } 787} 788 789sub filterUncompressed 790{ 791 my $self = shift ; 792 793 if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { 794 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ; 795 } 796 else { 797 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]); 798 } 799} 800 801 802# from Archive::Zip & info-zip 803sub _dosToUnixTime 804{ 805 my $dt = shift; 806 807 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; 808 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; 809 my $mday = ( ( $dt >> 16 ) & 0x1f ); 810 811 my $hour = ( ( $dt >> 11 ) & 0x1f ); 812 my $min = ( ( $dt >> 5 ) & 0x3f ); 813 my $sec = ( ( $dt << 1 ) & 0x3e ); 814 815 use Time::Local ; 816 my $time_t = Time::Local::timelocal( $sec, $min, $hour, $mday, $mon, $year); 817 return 0 if ! defined $time_t; 818 return $time_t; 819 820} 821 822#sub scanCentralDirectory 823#{ 824# # Use cases 825# # 1 32-bit CD 826# # 2 64-bit CD 827# 828# my $self = shift ; 829# 830# my @CD = (); 831# my $offset = $self->findCentralDirectoryOffset(); 832# 833# return 0 834# if ! defined $offset; 835# 836# $self->smarkSeek($offset, 0, SEEK_SET) ; 837# 838# # Now walk the Central Directory Records 839# my $buffer ; 840# while ($self->smartReadExact(\$buffer, 46) && 841# unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 842# 843# my $compressedLength = unpack ("V", substr($buffer, 20, 4)); 844# my $filename_length = unpack ("v", substr($buffer, 28, 2)); 845# my $extra_length = unpack ("v", substr($buffer, 30, 2)); 846# my $comment_length = unpack ("v", substr($buffer, 32, 2)); 847# 848# $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR) 849# if $extra_length || $comment_length || $filename_length; 850# push @CD, $compressedLength ; 851# } 852# 853#} 854# 855#sub findCentralDirectoryOffset 856#{ 857# my $self = shift ; 858# 859# # Most common use-case is where there is no comment, so 860# # know exactly where the end of central directory record 861# # should be. 862# 863# $self->smarkSeek(-22, 0, SEEK_END) ; 864# 865# my $buffer; 866# $self->smartReadExact(\$buffer, 22) ; 867# 868# my $zip64 = 0; 869# my $centralDirOffset ; 870# if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 871# $centralDirOffset = unpack ("V", substr($buffer, 16, 2)); 872# } 873# else { 874# die "xxxx"; 875# } 876# 877# return $centralDirOffset ; 878#} 879# 880#sub is84BitCD 881#{ 882# # TODO 883# my $self = shift ; 884#} 885 886 887sub skip 888{ 889 my $self = shift; 890 my $size = shift; 891 892 use Fcntl qw(SEEK_CUR); 893 if (ref $size eq 'U64') { 894 $self->smartSeek($size->get64bit(), SEEK_CUR); 895 } 896 else { 897 $self->smartSeek($size, SEEK_CUR); 898 } 899 900} 901 902 903sub scanCentralDirectory 904{ 905 my $self = shift; 906 907 my $here = $self->tell(); 908 909 # Use cases 910 # 1 32-bit CD 911 # 2 64-bit CD 912 913 my @CD = (); 914 my $offset = $self->findCentralDirectoryOffset(); 915 916 return () 917 if ! defined $offset; 918 919 $self->smarkSeek($offset, 0, SEEK_SET) ; 920 921 # Now walk the Central Directory Records 922 my $buffer ; 923 while ($self->smartReadExact(\$buffer, 46) && 924 unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 925 926 my $compressedLength = unpack("V", substr($buffer, 20, 4)); 927 my $uncompressedLength = unpack("V", substr($buffer, 24, 4)); 928 my $filename_length = unpack("v", substr($buffer, 28, 2)); 929 my $extra_length = unpack("v", substr($buffer, 30, 2)); 930 my $comment_length = unpack("v", substr($buffer, 32, 2)); 931 932 $self->skip($filename_length ) ; 933 934 my $v64 = U64->new( $compressedLength ); 935 936 if (U64::full32 $compressedLength ) { 937 $self->smartReadExact(\$buffer, $extra_length) ; 938 die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer) 939 if length($buffer) != $extra_length; 940 my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength); 941 942 # If not Zip64 extra field, assume size is 0xFFFFFFFF 943 $v64 = $got if defined $got; 944 } 945 else { 946 $self->skip($extra_length) ; 947 } 948 949 $self->skip($comment_length ) ; 950 951 push @CD, $v64 ; 952 } 953 954 $self->smartSeek($here, 0, SEEK_SET) ; 955 956 return @CD; 957} 958 959sub get64Extra 960{ 961 my $self = shift ; 962 963 my $buffer = shift; 964 my $is_uncomp = shift ; 965 966 my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer); 967 968 if (! defined $extra) 969 { 970 return undef; 971 } 972 else 973 { 974 my $u64 = U64::newUnpack_V64(substr($extra, $is_uncomp ? 8 : 0)) ; 975 return $u64; 976 } 977} 978 979sub offsetFromZip64 980{ 981 my $self = shift ; 982 my $here = shift; 983 984 $self->smartSeek($here - 20, 0, SEEK_SET) 985 or die "xx $!" ; 986 987 my $buffer; 988 my $got = 0; 989 $self->smartReadExact(\$buffer, 20) 990 or die "xxx $here $got $!" ; 991 992 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) { 993 my $cd64 = U64::Value_VV64 substr($buffer, 8, 8); 994 995 $self->smartSeek($cd64, 0, SEEK_SET) ; 996 997 $self->smartReadExact(\$buffer, 4) 998 or die "xxx" ; 999 1000 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) { 1001 1002 $self->smartReadExact(\$buffer, 8) 1003 or die "xxx" ; 1004 my $size = U64::Value_VV64($buffer); 1005 $self->smartReadExact(\$buffer, $size) 1006 or die "xxx" ; 1007 1008 my $cd64 = U64::Value_VV64 substr($buffer, 36, 8); 1009 1010 return $cd64 ; 1011 } 1012 1013 die "zzz"; 1014 } 1015 1016 die "zzz"; 1017} 1018 1019use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); 1020 1021sub findCentralDirectoryOffset 1022{ 1023 my $self = shift ; 1024 1025 # Most common use-case is where there is no comment, so 1026 # know exactly where the end of central directory record 1027 # should be. 1028 1029 $self->smartSeek(-22, 0, SEEK_END) ; 1030 my $here = $self->tell(); 1031 1032 my $buffer; 1033 $self->smartReadExact(\$buffer, 22) 1034 or die "xxx" ; 1035 1036 my $zip64 = 0; 1037 my $centralDirOffset ; 1038 if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 1039 $centralDirOffset = unpack("V", substr($buffer, 16, 4)); 1040 } 1041 else { 1042 $self->smartSeek(0, 0, SEEK_END) ; 1043 1044 my $fileLen = $self->tell(); 1045 my $want = 0 ; 1046 1047 while(1) { 1048 $want += 1024; 1049 my $seekTo = $fileLen - $want; 1050 if ($seekTo < 0 ) { 1051 $seekTo = 0; 1052 $want = $fileLen ; 1053 } 1054 $self->smartSeek( $seekTo, 0, SEEK_SET) 1055 or die "xxx $!" ; 1056 my $got; 1057 $self->smartReadExact($buffer, $want) 1058 or die "xxx " ; 1059 my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); 1060 1061 if ($pos >= 0) { 1062 #$here = $self->tell(); 1063 $here = $seekTo + $pos ; 1064 $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); 1065 last ; 1066 } 1067 1068 return undef 1069 if $want == $fileLen; 1070 } 1071 } 1072 1073 $centralDirOffset = $self->offsetFromZip64($here) 1074 if U64::full32 $centralDirOffset ; 1075 1076 return $centralDirOffset ; 1077} 1078 10791; 1080 1081__END__ 1082 1083 1084=head1 NAME 1085 1086IO::Uncompress::Unzip - Read zip files/buffers 1087 1088=head1 SYNOPSIS 1089 1090 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1091 1092 my $status = unzip $input => $output [,OPTS] 1093 or die "unzip failed: $UnzipError\n"; 1094 1095 my $z = IO::Uncompress::Unzip->new( $input [OPTS] ) 1096 or die "unzip failed: $UnzipError\n"; 1097 1098 $status = $z->read($buffer) 1099 $status = $z->read($buffer, $length) 1100 $status = $z->read($buffer, $length, $offset) 1101 $line = $z->getline() 1102 $char = $z->getc() 1103 $char = $z->ungetc() 1104 $char = $z->opened() 1105 1106 $status = $z->inflateSync() 1107 1108 $data = $z->trailingData() 1109 $status = $z->nextStream() 1110 $data = $z->getHeaderInfo() 1111 $z->tell() 1112 $z->seek($position, $whence) 1113 $z->binmode() 1114 $z->fileno() 1115 $z->eof() 1116 $z->close() 1117 1118 $UnzipError ; 1119 1120 # IO::File mode 1121 1122 <$z> 1123 read($z, $buffer); 1124 read($z, $buffer, $length); 1125 read($z, $buffer, $length, $offset); 1126 tell($z) 1127 seek($z, $position, $whence) 1128 binmode($z) 1129 fileno($z) 1130 eof($z) 1131 close($z) 1132 1133=head1 DESCRIPTION 1134 1135This module provides a Perl interface that allows the reading of 1136zlib files/buffers. 1137 1138For writing zip files/buffers, see the companion module IO::Compress::Zip. 1139 1140The primary purpose of this module is to provide I<streaming> read access to 1141zip files and buffers. 1142 1143At present the following compression methods are supported by IO::Uncompress::Unzip 1144 1145=over 5 1146 1147=item Store (0) 1148 1149=item Deflate (8) 1150 1151=item Bzip2 (12) 1152 1153To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must 1154be installed. 1155 1156=item Lzma (14) 1157 1158To read LZMA content, the module C<IO::Uncompress::UnLzma> must 1159be installed. 1160 1161=item Xz (95) 1162 1163To read Xz content, the module C<IO::Uncompress::UnXz> must 1164be installed. 1165 1166=item Zstandard (93) 1167 1168To read Zstandard content, the module C<IO::Uncompress::UnZstd> must 1169be installed. 1170 1171=back 1172 1173=head1 Functional Interface 1174 1175A top-level function, C<unzip>, is provided to carry out 1176"one-shot" uncompression between buffers and/or files. For finer 1177control over the uncompression process, see the L</"OO Interface"> 1178section. 1179 1180 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1181 1182 unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] 1183 or die "unzip failed: $UnzipError\n"; 1184 1185The functional interface needs Perl5.005 or better. 1186 1187=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] 1188 1189C<unzip> expects at least two parameters, 1190C<$input_filename_or_reference> and C<$output_filename_or_reference> 1191and zero or more optional parameters (see L</Optional Parameters>) 1192 1193=head3 The C<$input_filename_or_reference> parameter 1194 1195The parameter, C<$input_filename_or_reference>, is used to define the 1196source of the compressed data. 1197 1198It can take one of the following forms: 1199 1200=over 5 1201 1202=item A filename 1203 1204If the C<$input_filename_or_reference> parameter is a simple scalar, it is 1205assumed to be a filename. This file will be opened for reading and the 1206input data will be read from it. 1207 1208=item A filehandle 1209 1210If the C<$input_filename_or_reference> parameter is a filehandle, the input 1211data will be read from it. The string '-' can be used as an alias for 1212standard input. 1213 1214=item A scalar reference 1215 1216If C<$input_filename_or_reference> is a scalar reference, the input data 1217will be read from C<$$input_filename_or_reference>. 1218 1219=item An array reference 1220 1221If C<$input_filename_or_reference> is an array reference, each element in 1222the array must be a filename. 1223 1224The input data will be read from each file in turn. 1225 1226The complete array will be walked to ensure that it only 1227contains valid filenames before any data is uncompressed. 1228 1229=item An Input FileGlob string 1230 1231If C<$input_filename_or_reference> is a string that is delimited by the 1232characters "<" and ">" C<unzip> will assume that it is an 1233I<input fileglob string>. The input is the list of files that match the 1234fileglob. 1235 1236See L<File::GlobMapper|File::GlobMapper> for more details. 1237 1238=back 1239 1240If the C<$input_filename_or_reference> parameter is any other type, 1241C<undef> will be returned. 1242 1243=head3 The C<$output_filename_or_reference> parameter 1244 1245The parameter C<$output_filename_or_reference> is used to control the 1246destination of the uncompressed data. This parameter can take one of 1247these forms. 1248 1249=over 5 1250 1251=item A filename 1252 1253If the C<$output_filename_or_reference> parameter is a simple scalar, it is 1254assumed to be a filename. This file will be opened for writing and the 1255uncompressed data will be written to it. 1256 1257=item A filehandle 1258 1259If the C<$output_filename_or_reference> parameter is a filehandle, the 1260uncompressed data will be written to it. The string '-' can be used as 1261an alias for standard output. 1262 1263=item A scalar reference 1264 1265If C<$output_filename_or_reference> is a scalar reference, the 1266uncompressed data will be stored in C<$$output_filename_or_reference>. 1267 1268=item An Array Reference 1269 1270If C<$output_filename_or_reference> is an array reference, 1271the uncompressed data will be pushed onto the array. 1272 1273=item An Output FileGlob 1274 1275If C<$output_filename_or_reference> is a string that is delimited by the 1276characters "<" and ">" C<unzip> will assume that it is an 1277I<output fileglob string>. The output is the list of files that match the 1278fileglob. 1279 1280When C<$output_filename_or_reference> is an fileglob string, 1281C<$input_filename_or_reference> must also be a fileglob string. Anything 1282else is an error. 1283 1284See L<File::GlobMapper|File::GlobMapper> for more details. 1285 1286=back 1287 1288If the C<$output_filename_or_reference> parameter is any other type, 1289C<undef> will be returned. 1290 1291=head2 Notes 1292 1293When C<$input_filename_or_reference> maps to multiple compressed 1294files/buffers and C<$output_filename_or_reference> is 1295a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a 1296concatenation of all the uncompressed data from each of the input 1297files/buffers. 1298 1299=head2 Optional Parameters 1300 1301The optional parameters for the one-shot function C<unzip> 1302are (for the most part) identical to those used with the OO interface defined in the 1303L</"Constructor Options"> section. The exceptions are listed below 1304 1305=over 5 1306 1307=item C<< AutoClose => 0|1 >> 1308 1309This option applies to any input or output data streams to 1310C<unzip> that are filehandles. 1311 1312If C<AutoClose> is specified, and the value is true, it will result in all 1313input and/or output filehandles being closed once C<unzip> has 1314completed. 1315 1316This parameter defaults to 0. 1317 1318=item C<< BinModeOut => 0|1 >> 1319 1320This option is now a no-op. All files will be written in binmode. 1321 1322=item C<< Append => 0|1 >> 1323 1324The behaviour of this option is dependent on the type of output data 1325stream. 1326 1327=over 5 1328 1329=item * A Buffer 1330 1331If C<Append> is enabled, all uncompressed data will be append to the end of 1332the output buffer. Otherwise the output buffer will be cleared before any 1333uncompressed data is written to it. 1334 1335=item * A Filename 1336 1337If C<Append> is enabled, the file will be opened in append mode. Otherwise 1338the contents of the file, if any, will be truncated before any uncompressed 1339data is written to it. 1340 1341=item * A Filehandle 1342 1343If C<Append> is enabled, the filehandle will be positioned to the end of 1344the file via a call to C<seek> before any uncompressed data is 1345written to it. Otherwise the file pointer will not be moved. 1346 1347=back 1348 1349When C<Append> is specified, and set to true, it will I<append> all uncompressed 1350data to the output data stream. 1351 1352So when the output is a filehandle it will carry out a seek to the eof 1353before writing any uncompressed data. If the output is a filename, it will be opened for 1354appending. If the output is a buffer, all uncompressed data will be 1355appended to the existing buffer. 1356 1357Conversely when C<Append> is not specified, or it is present and is set to 1358false, it will operate as follows. 1359 1360When the output is a filename, it will truncate the contents of the file 1361before writing any uncompressed data. If the output is a filehandle 1362its position will not be changed. If the output is a buffer, it will be 1363wiped before any uncompressed data is output. 1364 1365Defaults to 0. 1366 1367=item C<< MultiStream => 0|1 >> 1368 1369If the input file/buffer contains multiple compressed data streams, this 1370option will uncompress the whole lot as a single data stream. 1371 1372Defaults to 0. 1373 1374=item C<< TrailingData => $scalar >> 1375 1376Returns the data, if any, that is present immediately after the compressed 1377data stream once uncompression is complete. 1378 1379This option can be used when there is useful information immediately 1380following the compressed data stream, and you don't know the length of the 1381compressed data stream. 1382 1383If the input is a buffer, C<trailingData> will return everything from the 1384end of the compressed data stream to the end of the buffer. 1385 1386If the input is a filehandle, C<trailingData> will return the data that is 1387left in the filehandle input buffer once the end of the compressed data 1388stream has been reached. You can then use the filehandle to read the rest 1389of the input file. 1390 1391Don't bother using C<trailingData> if the input is a filename. 1392 1393If you know the length of the compressed data stream before you start 1394uncompressing, you can avoid having to use C<trailingData> by setting the 1395C<InputLength> option. 1396 1397=back 1398 1399=head2 Examples 1400 1401Say you have a zip file, C<file1.zip>, that only contains a 1402single member, you can read it and write the uncompressed data to the 1403file C<file1.txt> like this. 1404 1405 use strict ; 1406 use warnings ; 1407 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1408 1409 my $input = "file1.zip"; 1410 my $output = "file1.txt"; 1411 unzip $input => $output 1412 or die "unzip failed: $UnzipError\n"; 1413 1414If you have a zip file that contains multiple members and want to read a 1415specific member from the file, say C<"data1">, use the C<Name> option 1416 1417 use strict ; 1418 use warnings ; 1419 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1420 1421 my $input = "file1.zip"; 1422 my $output = "file1.txt"; 1423 unzip $input => $output, Name => "data1" 1424 or die "unzip failed: $UnzipError\n"; 1425 1426Alternatively, if you want to read the C<"data1"> member into memory, use 1427a scalar reference for the C<output> parameter. 1428 1429 use strict ; 1430 use warnings ; 1431 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1432 1433 my $input = "file1.zip"; 1434 my $output ; 1435 unzip $input => \$output, Name => "data1" 1436 or die "unzip failed: $UnzipError\n"; 1437 # $output now contains the uncompressed data 1438 1439To read from an existing Perl filehandle, C<$input>, and write the 1440uncompressed data to a buffer, C<$buffer>. 1441 1442 use strict ; 1443 use warnings ; 1444 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1445 use IO::File ; 1446 1447 my $input = IO::File->new( "<file1.zip" ) 1448 or die "Cannot open 'file1.zip': $!\n" ; 1449 my $buffer ; 1450 unzip $input => \$buffer 1451 or die "unzip failed: $UnzipError\n"; 1452 1453=head1 OO Interface 1454 1455=head2 Constructor 1456 1457The format of the constructor for IO::Uncompress::Unzip is shown below 1458 1459 my $z = IO::Uncompress::Unzip->new( $input [OPTS] ) 1460 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1461 1462Returns an C<IO::Uncompress::Unzip> object on success and undef on failure. 1463The variable C<$UnzipError> will contain an error message on failure. 1464 1465If you are running Perl 5.005 or better the object, C<$z>, returned from 1466IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle. 1467This means that all normal input file operations can be carried out with 1468C<$z>. For example, to read a line from a compressed file/buffer you can 1469use either of these forms 1470 1471 $line = $z->getline(); 1472 $line = <$z>; 1473 1474The mandatory parameter C<$input> is used to determine the source of the 1475compressed data. This parameter can take one of three forms. 1476 1477=over 5 1478 1479=item A filename 1480 1481If the C<$input> parameter is a scalar, it is assumed to be a filename. This 1482file will be opened for reading and the compressed data will be read from it. 1483 1484=item A filehandle 1485 1486If the C<$input> parameter is a filehandle, the compressed data will be 1487read from it. 1488The string '-' can be used as an alias for standard input. 1489 1490=item A scalar reference 1491 1492If C<$input> is a scalar reference, the compressed data will be read from 1493C<$$input>. 1494 1495=back 1496 1497=head2 Constructor Options 1498 1499The option names defined below are case insensitive and can be optionally 1500prefixed by a '-'. So all of the following are valid 1501 1502 -AutoClose 1503 -autoclose 1504 AUTOCLOSE 1505 autoclose 1506 1507OPTS is a combination of the following options: 1508 1509=over 5 1510 1511=item C<< Name => "membername" >> 1512 1513Open "membername" from the zip file for reading. 1514 1515=item C<< Efs => 0| 1 >> 1516 1517When this option is set to true AND the zip archive being read has 1518the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8. 1519 1520If the member name in the zip archive is not valid UTF-8 when this optionn is true, 1521the script will die with an error message. 1522 1523Note that this option only works with Perl 5.8.4 or better. 1524 1525This option defaults to B<false>. 1526 1527=item C<< AutoClose => 0|1 >> 1528 1529This option is only valid when the C<$input> parameter is a filehandle. If 1530specified, and the value is true, it will result in the file being closed once 1531either the C<close> method is called or the IO::Uncompress::Unzip object is 1532destroyed. 1533 1534This parameter defaults to 0. 1535 1536=item C<< MultiStream => 0|1 >> 1537 1538Treats the complete zip file/buffer as a single compressed data 1539stream. When reading in multi-stream mode each member of the zip 1540file/buffer will be uncompressed in turn until the end of the file/buffer 1541is encountered. 1542 1543This parameter defaults to 0. 1544 1545=item C<< Prime => $string >> 1546 1547This option will uncompress the contents of C<$string> before processing the 1548input file/buffer. 1549 1550This option can be useful when the compressed data is embedded in another 1551file/data structure and it is not possible to work out where the compressed 1552data begins without having to read the first few bytes. If this is the 1553case, the uncompression can be I<primed> with these bytes using this 1554option. 1555 1556=item C<< Transparent => 0|1 >> 1557 1558If this option is set and the input file/buffer is not compressed data, 1559the module will allow reading of it anyway. 1560 1561In addition, if the input file/buffer does contain compressed data and 1562there is non-compressed data immediately following it, setting this option 1563will make this module treat the whole file/buffer as a single data stream. 1564 1565This option defaults to 1. 1566 1567=item C<< BlockSize => $num >> 1568 1569When reading the compressed input data, IO::Uncompress::Unzip will read it in 1570blocks of C<$num> bytes. 1571 1572This option defaults to 4096. 1573 1574=item C<< InputLength => $size >> 1575 1576When present this option will limit the number of compressed bytes read 1577from the input file/buffer to C<$size>. This option can be used in the 1578situation where there is useful data directly after the compressed data 1579stream and you know beforehand the exact length of the compressed data 1580stream. 1581 1582This option is mostly used when reading from a filehandle, in which case 1583the file pointer will be left pointing to the first byte directly after the 1584compressed data stream. 1585 1586This option defaults to off. 1587 1588=item C<< Append => 0|1 >> 1589 1590This option controls what the C<read> method does with uncompressed data. 1591 1592If set to 1, all uncompressed data will be appended to the output parameter 1593of the C<read> method. 1594 1595If set to 0, the contents of the output parameter of the C<read> method 1596will be overwritten by the uncompressed data. 1597 1598Defaults to 0. 1599 1600=item C<< Strict => 0|1 >> 1601 1602This option controls whether the extra checks defined below are used when 1603carrying out the decompression. When Strict is on, the extra tests are 1604carried out, when Strict is off they are not. 1605 1606The default for this option is off. 1607 1608=back 1609 1610=head2 Examples 1611 1612TODO 1613 1614=head1 Methods 1615 1616=head2 read 1617 1618Usage is 1619 1620 $status = $z->read($buffer) 1621 1622Reads a block of compressed data (the size of the compressed block is 1623determined by the C<Buffer> option in the constructor), uncompresses it and 1624writes any uncompressed data into C<$buffer>. If the C<Append> parameter is 1625set in the constructor, the uncompressed data will be appended to the 1626C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. 1627 1628Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1629or a negative number on error. 1630 1631=head2 read 1632 1633Usage is 1634 1635 $status = $z->read($buffer, $length) 1636 $status = $z->read($buffer, $length, $offset) 1637 1638 $status = read($z, $buffer, $length) 1639 $status = read($z, $buffer, $length, $offset) 1640 1641Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. 1642 1643The main difference between this form of the C<read> method and the 1644previous one, is that this one will attempt to return I<exactly> C<$length> 1645bytes. The only circumstances that this function will not is if end-of-file 1646or an IO error is encountered. 1647 1648Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1649or a negative number on error. 1650 1651=head2 getline 1652 1653Usage is 1654 1655 $line = $z->getline() 1656 $line = <$z> 1657 1658Reads a single line. 1659 1660This method fully supports the use of the variable C<$/> (or 1661C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to 1662determine what constitutes an end of line. Paragraph mode, record mode and 1663file slurp mode are all supported. 1664 1665=head2 getc 1666 1667Usage is 1668 1669 $char = $z->getc() 1670 1671Read a single character. 1672 1673=head2 ungetc 1674 1675Usage is 1676 1677 $char = $z->ungetc($string) 1678 1679=head2 inflateSync 1680 1681Usage is 1682 1683 $status = $z->inflateSync() 1684 1685TODO 1686 1687=head2 getHeaderInfo 1688 1689Usage is 1690 1691 $hdr = $z->getHeaderInfo(); 1692 @hdrs = $z->getHeaderInfo(); 1693 1694This method returns either a hash reference (in scalar context) or a list 1695or hash references (in array context) that contains information about each 1696of the header fields in the compressed data stream(s). 1697 1698=head2 tell 1699 1700Usage is 1701 1702 $z->tell() 1703 tell $z 1704 1705Returns the uncompressed file offset. 1706 1707=head2 eof 1708 1709Usage is 1710 1711 $z->eof(); 1712 eof($z); 1713 1714Returns true if the end of the compressed input stream has been reached. 1715 1716=head2 seek 1717 1718 $z->seek($position, $whence); 1719 seek($z, $position, $whence); 1720 1721Provides a sub-set of the C<seek> functionality, with the restriction 1722that it is only legal to seek forward in the input file/buffer. 1723It is a fatal error to attempt to seek backward. 1724 1725Note that the implementation of C<seek> in this module does not provide 1726true random access to a compressed file/buffer. It works by uncompressing 1727data from the current offset in the file/buffer until it reaches the 1728uncompressed offset specified in the parameters to C<seek>. For very small 1729files this may be acceptable behaviour. For large files it may cause an 1730unacceptable delay. 1731 1732The C<$whence> parameter takes one the usual values, namely SEEK_SET, 1733SEEK_CUR or SEEK_END. 1734 1735Returns 1 on success, 0 on failure. 1736 1737=head2 binmode 1738 1739Usage is 1740 1741 $z->binmode 1742 binmode $z ; 1743 1744This is a noop provided for completeness. 1745 1746=head2 opened 1747 1748 $z->opened() 1749 1750Returns true if the object currently refers to a opened file/buffer. 1751 1752=head2 autoflush 1753 1754 my $prev = $z->autoflush() 1755 my $prev = $z->autoflush(EXPR) 1756 1757If the C<$z> object is associated with a file or a filehandle, this method 1758returns the current autoflush setting for the underlying filehandle. If 1759C<EXPR> is present, and is non-zero, it will enable flushing after every 1760write/print operation. 1761 1762If C<$z> is associated with a buffer, this method has no effect and always 1763returns C<undef>. 1764 1765B<Note> that the special variable C<$|> B<cannot> be used to set or 1766retrieve the autoflush setting. 1767 1768=head2 input_line_number 1769 1770 $z->input_line_number() 1771 $z->input_line_number(EXPR) 1772 1773Returns the current uncompressed line number. If C<EXPR> is present it has 1774the effect of setting the line number. Note that setting the line number 1775does not change the current position within the file/buffer being read. 1776 1777The contents of C<$/> are used to determine what constitutes a line 1778terminator. 1779 1780=head2 fileno 1781 1782 $z->fileno() 1783 fileno($z) 1784 1785If the C<$z> object is associated with a file or a filehandle, C<fileno> 1786will return the underlying file descriptor. Once the C<close> method is 1787called C<fileno> will return C<undef>. 1788 1789If the C<$z> object is associated with a buffer, this method will return 1790C<undef>. 1791 1792=head2 close 1793 1794 $z->close() ; 1795 close $z ; 1796 1797Closes the output file/buffer. 1798 1799For most versions of Perl this method will be automatically invoked if 1800the IO::Uncompress::Unzip object is destroyed (either explicitly or by the 1801variable with the reference to the object going out of scope). The 1802exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In 1803these cases, the C<close> method will be called automatically, but 1804not until global destruction of all live objects when the program is 1805terminating. 1806 1807Therefore, if you want your scripts to be able to run on all versions 1808of Perl, you should call C<close> explicitly and not rely on automatic 1809closing. 1810 1811Returns true on success, otherwise 0. 1812 1813If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip 1814object was created, and the object is associated with a file, the 1815underlying file will also be closed. 1816 1817=head2 nextStream 1818 1819Usage is 1820 1821 my $status = $z->nextStream(); 1822 1823Skips to the next compressed data stream in the input file/buffer. If a new 1824compressed data stream is found, the eof marker will be cleared and C<$.> 1825will be reset to 0. 1826 1827If trailing data is present immediately after the zip archive and the 1828C<Transparent> option is enabled, this method will consider that trailing 1829data to be another member of the zip archive. 1830 1831Returns 1 if a new stream was found, 0 if none was found, and -1 if an 1832error was encountered. 1833 1834=head2 trailingData 1835 1836Usage is 1837 1838 my $data = $z->trailingData(); 1839 1840Returns the data, if any, that is present immediately after the compressed 1841data stream once uncompression is complete. It only makes sense to call 1842this method once the end of the compressed data stream has been 1843encountered. 1844 1845This option can be used when there is useful information immediately 1846following the compressed data stream, and you don't know the length of the 1847compressed data stream. 1848 1849If the input is a buffer, C<trailingData> will return everything from the 1850end of the compressed data stream to the end of the buffer. 1851 1852If the input is a filehandle, C<trailingData> will return the data that is 1853left in the filehandle input buffer once the end of the compressed data 1854stream has been reached. You can then use the filehandle to read the rest 1855of the input file. 1856 1857Don't bother using C<trailingData> if the input is a filename. 1858 1859If you know the length of the compressed data stream before you start 1860uncompressing, you can avoid having to use C<trailingData> by setting the 1861C<InputLength> option in the constructor. 1862 1863=head1 Importing 1864 1865No symbolic constants are required by IO::Uncompress::Unzip at present. 1866 1867=over 5 1868 1869=item :all 1870 1871Imports C<unzip> and C<$UnzipError>. 1872Same as doing this 1873 1874 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1875 1876=back 1877 1878=head1 EXAMPLES 1879 1880=head2 Working with Net::FTP 1881 1882See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP"> 1883 1884=head2 Walking through a zip file 1885 1886The code below can be used to traverse a zip file, one compressed data 1887stream at a time. 1888 1889 use IO::Uncompress::Unzip qw($UnzipError); 1890 1891 my $zipfile = "somefile.zip"; 1892 my $u = IO::Uncompress::Unzip->new( $zipfile ) 1893 or die "Cannot open $zipfile: $UnzipError"; 1894 1895 my $status; 1896 for ($status = 1; $status > 0; $status = $u->nextStream()) 1897 { 1898 1899 my $name = $u->getHeaderInfo()->{Name}; 1900 warn "Processing member $name\n" ; 1901 1902 my $buff; 1903 while (($status = $u->read($buff)) > 0) { 1904 # Do something here 1905 } 1906 1907 last if $status < 0; 1908 } 1909 1910 die "Error processing $zipfile: $!\n" 1911 if $status < 0 ; 1912 1913Each individual compressed data stream is read until the logical 1914end-of-file is reached. Then C<nextStream> is called. This will skip to the 1915start of the next compressed data stream and clear the end-of-file flag. 1916 1917It is also worth noting that C<nextStream> can be called at any time -- you 1918don't have to wait until you have exhausted a compressed data stream before 1919skipping to the next one. 1920 1921=head2 Unzipping a complete zip file to disk 1922 1923Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip> 1924to read a zip file and unzip its contents to disk. 1925 1926The script is available from L<https://gist.github.com/eqhmcow/5389877> 1927 1928=head1 SUPPORT 1929 1930General feedback/questions/bug reports should be sent to 1931L<https://github.com/pmqs/IO-Compress/issues> (preferred) or 1932L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>. 1933 1934=head1 SEE ALSO 1935 1936L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress> 1937 1938L<IO::Compress::FAQ|IO::Compress::FAQ> 1939 1940L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>, 1941L<Archive::Tar|Archive::Tar>, 1942L<IO::Zlib|IO::Zlib> 1943 1944For RFC 1950, 1951 and 1952 see 1945L<https://datatracker.ietf.org/doc/html/rfc1950>, 1946L<https://datatracker.ietf.org/doc/html/rfc1951> and 1947L<https://datatracker.ietf.org/doc/html/rfc1952> 1948 1949The I<zlib> compression library was written by Jean-loup Gailly 1950C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>. 1951 1952The primary site for the I<zlib> compression library is 1953L<http://www.zlib.org>. 1954 1955The primary site for the I<zlib-ng> compression library is 1956L<https://github.com/zlib-ng/zlib-ng>. 1957 1958The primary site for gzip is L<http://www.gzip.org>. 1959 1960=head1 AUTHOR 1961 1962This module was written by Paul Marquess, C<pmqs@cpan.org>. 1963 1964=head1 MODIFICATION HISTORY 1965 1966See the Changes file. 1967 1968=head1 COPYRIGHT AND LICENSE 1969 1970Copyright (c) 2005-2023 Paul Marquess. All rights reserved. 1971 1972This program is free software; you can redistribute it and/or 1973modify it under the same terms as Perl itself. 1974