1package IO::Uncompress::Unzip; 2 3require 5.006 ; 4 5# for RFC1952 6 7use strict ; 8use warnings; 9use bytes; 10 11use IO::File; 12use IO::Uncompress::RawInflate 2.093 ; 13use IO::Compress::Base::Common 2.093 qw(:Status ); 14use IO::Uncompress::Adapter::Inflate 2.093 ; 15use IO::Uncompress::Adapter::Identity 2.093 ; 16use IO::Compress::Zlib::Extra 2.093 ; 17use IO::Compress::Zip::Constants 2.093 ; 18 19use Compress::Raw::Zlib 2.093 () ; 20 21BEGIN 22{ 23 # Don't trigger any __DIE__ Hooks. 24 local $SIG{__DIE__}; 25 26 eval{ require IO::Uncompress::Adapter::Bunzip2 ; 27 import IO::Uncompress::Adapter::Bunzip2 } ; 28 eval{ require IO::Uncompress::Adapter::UnLzma ; 29 import IO::Uncompress::Adapter::UnLzma } ; 30} 31 32 33require Exporter ; 34 35our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); 36 37$VERSION = '2.093'; 38$UnzipError = ''; 39 40@ISA = qw(IO::Uncompress::RawInflate Exporter); 41@EXPORT_OK = qw( $UnzipError unzip ); 42%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ; 43push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; 44Exporter::export_ok_tags('all'); 45 46%headerLookup = ( 47 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory, 48 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory, 49 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec, 50 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc, 51 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra, 52 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature, 53 ); 54 55sub new 56{ 57 my $class = shift ; 58 my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError); 59 $obj->_create(undef, 0, @_); 60} 61 62sub unzip 63{ 64 my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError); 65 return $obj->_inf(@_) ; 66} 67 68sub getExtraParams 69{ 70 71 return ( 72# # Zip header fields 73 'name' => [IO::Compress::Base::Common::Parse_any, undef], 74 75 'stream' => [IO::Compress::Base::Common::Parse_boolean, 0], 76 'efs' => [IO::Compress::Base::Common::Parse_boolean, 0], 77 78 # TODO - This means reading the central directory to get 79 # 1. the local header offsets 80 # 2. The compressed data length 81 ); 82} 83 84sub ckParams 85{ 86 my $self = shift ; 87 my $got = shift ; 88 89 # unzip always needs crc32 90 $got->setValue('crc32' => 1); 91 92 *$self->{UnzipData}{Name} = $got->getValue('name'); 93 *$self->{UnzipData}{efs} = $got->getValue('efs'); 94 95 return 1; 96} 97 98sub mkUncomp 99{ 100 my $self = shift ; 101 my $got = shift ; 102 103 my $magic = $self->ckMagic() 104 or return 0; 105 106 *$self->{Info} = $self->readHeader($magic) 107 or return undef ; 108 109 return 1; 110 111} 112 113sub ckMagic 114{ 115 my $self = shift; 116 117 my $magic ; 118 $self->smartReadExact(\$magic, 4); 119 120 *$self->{HeaderPending} = $magic ; 121 122 return $self->HeaderError("Minimum header size is " . 123 4 . " bytes") 124 if length $magic != 4 ; 125 126 return $self->HeaderError("Bad Magic") 127 if ! _isZipMagic($magic) ; 128 129 *$self->{Type} = 'zip'; 130 131 return $magic ; 132} 133 134 135sub fastForward 136{ 137 my $self = shift; 138 my $offset = shift; 139 140 # TODO - if Stream isn't enabled & reading from file, use seek 141 142 my $buffer = ''; 143 my $c = 1024 * 16; 144 145 while ($offset > 0) 146 { 147 $c = length $offset 148 if length $offset < $c ; 149 150 $offset -= $c; 151 152 $self->smartReadExact(\$buffer, $c) 153 or return 0; 154 } 155 156 return 1; 157} 158 159 160sub readHeader 161{ 162 my $self = shift; 163 my $magic = shift ; 164 165 my $name = *$self->{UnzipData}{Name} ; 166 my $hdr = $self->_readZipHeader($magic) ; 167 168 while (defined $hdr) 169 { 170 if (! defined $name || $hdr->{Name} eq $name) 171 { 172 return $hdr ; 173 } 174 175 # skip the data 176 # TODO - when Stream is off, use seek 177 my $buffer; 178 if (*$self->{ZipData}{Streaming}) { 179 while (1) { 180 181 my $b; 182 my $status = $self->smartRead(\$b, 1024 * 16); 183 184 return $self->saveErrorString(undef, "Truncated file") 185 if $status <= 0 ; 186 187 my $temp_buf ; 188 my $out; 189 190 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out); 191 192 return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, 193 *$self->{Uncomp}{ErrorNo}) 194 if $self->saveStatus($status) == STATUS_ERROR; 195 196 $self->pushBack($b) ; 197 198 if ($status == STATUS_ENDSTREAM) { 199 *$self->{Uncomp}->reset(); 200 last; 201 } 202 } 203 204 # skip the trailer 205 $self->smartReadExact(\$buffer, $hdr->{TrailerLength}) 206 or return $self->saveErrorString(undef, "Truncated file"); 207 } 208 else { 209 my $c = $hdr->{CompressedLength}->get64bit(); 210 $self->fastForward($c) 211 or return $self->saveErrorString(undef, "Truncated file"); 212 $buffer = ''; 213 } 214 215 $self->chkTrailer($buffer) == STATUS_OK 216 or return $self->saveErrorString(undef, "Truncated file"); 217 218 $hdr = $self->_readFullZipHeader(); 219 220 return $self->saveErrorString(undef, "Cannot find '$name'") 221 if $self->smartEof(); 222 } 223 224 return undef; 225} 226 227sub chkTrailer 228{ 229 my $self = shift; 230 my $trailer = shift; 231 232 my ($sig, $CRC32, $cSize, $uSize) ; 233 my ($cSizeHi, $uSizeHi) = (0, 0); 234 if (*$self->{ZipData}{Streaming}) { 235 $sig = unpack ("V", substr($trailer, 0, 4)); 236 $CRC32 = unpack ("V", substr($trailer, 4, 4)); 237 238 if (*$self->{ZipData}{Zip64} ) { 239 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8); 240 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8); 241 } 242 else { 243 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4); 244 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4); 245 } 246 247 return $self->TrailerError("Data Descriptor signature, got $sig") 248 if $sig != ZIP_DATA_HDR_SIG; 249 } 250 else { 251 ($CRC32, $cSize, $uSize) = 252 (*$self->{ZipData}{Crc32}, 253 *$self->{ZipData}{CompressedLen}, 254 *$self->{ZipData}{UnCompressedLen}); 255 } 256 257 *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ; 258 *$self->{Info}{CompressedLength} = $cSize->get64bit(); 259 *$self->{Info}{UncompressedLength} = $uSize->get64bit(); 260 261 if (*$self->{Strict}) { 262 return $self->TrailerError("CRC mismatch") 263 if $CRC32 != *$self->{ZipData}{CRC32} ; 264 265 return $self->TrailerError("CSIZE mismatch.") 266 if ! $cSize->equal(*$self->{CompSize}); 267 268 return $self->TrailerError("USIZE mismatch.") 269 if ! $uSize->equal(*$self->{UnCompSize}); 270 } 271 272 my $reachedEnd = STATUS_ERROR ; 273 # check for central directory or end of central directory 274 while (1) 275 { 276 my $magic ; 277 my $got = $self->smartRead(\$magic, 4); 278 279 return $self->saveErrorString(STATUS_ERROR, "Truncated file") 280 if $got != 4 && *$self->{Strict}; 281 282 if ($got == 0) { 283 return STATUS_EOF ; 284 } 285 elsif ($got < 0) { 286 return STATUS_ERROR ; 287 } 288 elsif ($got < 4) { 289 $self->pushBack($magic) ; 290 return STATUS_OK ; 291 } 292 293 my $sig = unpack("V", $magic) ; 294 295 my $hdr; 296 if ($hdr = $headerLookup{$sig}) 297 { 298 if (&$hdr($self, $magic) != STATUS_OK ) { 299 if (*$self->{Strict}) { 300 return STATUS_ERROR ; 301 } 302 else { 303 $self->clearError(); 304 return STATUS_OK ; 305 } 306 } 307 308 if ($sig == ZIP_END_CENTRAL_HDR_SIG) 309 { 310 return STATUS_OK ; 311 last; 312 } 313 } 314 elsif ($sig == ZIP_LOCAL_HDR_SIG) 315 { 316 $self->pushBack($magic) ; 317 return STATUS_OK ; 318 } 319 else 320 { 321 # put the data back 322 $self->pushBack($magic) ; 323 last; 324 } 325 } 326 327 return $reachedEnd ; 328} 329 330sub skipCentralDirectory 331{ 332 my $self = shift; 333 my $magic = shift ; 334 335 my $buffer; 336 $self->smartReadExact(\$buffer, 46 - 4) 337 or return $self->TrailerError("Minimum header size is " . 338 46 . " bytes") ; 339 340 my $keep = $magic . $buffer ; 341 *$self->{HeaderPending} = $keep ; 342 343 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2)); 344 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2)); 345 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2)); 346 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2)); 347 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4)); 348 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4)); 349 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4)); 350 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4)); 351 my $filename_length = unpack ("v", substr($buffer, 28-4, 2)); 352 my $extra_length = unpack ("v", substr($buffer, 30-4, 2)); 353 my $comment_length = unpack ("v", substr($buffer, 32-4, 2)); 354 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2)); 355 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2)); 356 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2)); 357 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2)); 358 359 360 my $filename; 361 my $extraField; 362 my $comment ; 363 if ($filename_length) 364 { 365 $self->smartReadExact(\$filename, $filename_length) 366 or return $self->TruncatedTrailer("filename"); 367 $keep .= $filename ; 368 } 369 370 if ($extra_length) 371 { 372 $self->smartReadExact(\$extraField, $extra_length) 373 or return $self->TruncatedTrailer("extra"); 374 $keep .= $extraField ; 375 } 376 377 if ($comment_length) 378 { 379 $self->smartReadExact(\$comment, $comment_length) 380 or return $self->TruncatedTrailer("comment"); 381 $keep .= $comment ; 382 } 383 384 return STATUS_OK ; 385} 386 387sub skipArchiveExtra 388{ 389 my $self = shift; 390 my $magic = shift ; 391 392 my $buffer; 393 $self->smartReadExact(\$buffer, 4) 394 or return $self->TrailerError("Minimum header size is " . 395 4 . " bytes") ; 396 397 my $keep = $magic . $buffer ; 398 399 my $size = unpack ("V", $buffer); 400 401 $self->smartReadExact(\$buffer, $size) 402 or return $self->TrailerError("Minimum header size is " . 403 $size . " bytes") ; 404 405 $keep .= $buffer ; 406 *$self->{HeaderPending} = $keep ; 407 408 return STATUS_OK ; 409} 410 411 412sub skipCentralDirectory64Rec 413{ 414 my $self = shift; 415 my $magic = shift ; 416 417 my $buffer; 418 $self->smartReadExact(\$buffer, 8) 419 or return $self->TrailerError("Minimum header size is " . 420 8 . " bytes") ; 421 422 my $keep = $magic . $buffer ; 423 424 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer); 425 my $size = $sizeHi * U64::MAX32 + $sizeLo; 426 427 $self->fastForward($size) 428 or return $self->TrailerError("Minimum header size is " . 429 $size . " bytes") ; 430 431 #$keep .= $buffer ; 432 #*$self->{HeaderPending} = $keep ; 433 434 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2)); 435 #my $extractVersion = unpack ("v", substr($buffer, 2, 2)); 436 #my $diskNumber = unpack ("V", substr($buffer, 4, 4)); 437 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4)); 438 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8)); 439 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8)); 440 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8)); 441 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8)); 442 443 return STATUS_OK ; 444} 445 446sub skipCentralDirectory64Loc 447{ 448 my $self = shift; 449 my $magic = shift ; 450 451 my $buffer; 452 $self->smartReadExact(\$buffer, 20 - 4) 453 or return $self->TrailerError("Minimum header size is " . 454 20 . " bytes") ; 455 456 my $keep = $magic . $buffer ; 457 *$self->{HeaderPending} = $keep ; 458 459 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4)); 460 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8)); 461 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4)); 462 463 return STATUS_OK ; 464} 465 466sub skipEndCentralDirectory 467{ 468 my $self = shift; 469 my $magic = shift ; 470 471 472 my $buffer; 473 $self->smartReadExact(\$buffer, 22 - 4) 474 or return $self->TrailerError("Minimum header size is " . 475 22 . " bytes") ; 476 477 my $keep = $magic . $buffer ; 478 *$self->{HeaderPending} = $keep ; 479 480 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2)); 481 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2)); 482 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2)); 483 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2)); 484 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 4)); 485 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 4)); 486 my $comment_length = unpack ("v", substr($buffer, 20-4, 2)); 487 488 489 my $comment ; 490 if ($comment_length) 491 { 492 $self->smartReadExact(\$comment, $comment_length) 493 or return $self->TruncatedTrailer("comment"); 494 $keep .= $comment ; 495 } 496 497 return STATUS_OK ; 498} 499 500 501sub _isZipMagic 502{ 503 my $buffer = shift ; 504 return 0 if length $buffer < 4 ; 505 my $sig = unpack("V", $buffer) ; 506 return $sig == ZIP_LOCAL_HDR_SIG ; 507} 508 509 510sub _readFullZipHeader($) 511{ 512 my ($self) = @_ ; 513 my $magic = '' ; 514 515 $self->smartReadExact(\$magic, 4); 516 517 *$self->{HeaderPending} = $magic ; 518 519 return $self->HeaderError("Minimum header size is " . 520 30 . " bytes") 521 if length $magic != 4 ; 522 523 524 return $self->HeaderError("Bad Magic") 525 if ! _isZipMagic($magic) ; 526 527 my $status = $self->_readZipHeader($magic); 528 delete *$self->{Transparent} if ! defined $status ; 529 return $status ; 530} 531 532sub _readZipHeader($) 533{ 534 my ($self, $magic) = @_ ; 535 my ($HeaderCRC) ; 536 my ($buffer) = '' ; 537 538 $self->smartReadExact(\$buffer, 30 - 4) 539 or return $self->HeaderError("Minimum header size is " . 540 30 . " bytes") ; 541 542 my $keep = $magic . $buffer ; 543 *$self->{HeaderPending} = $keep ; 544 545 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2)); 546 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2)); 547 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2)); 548 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4)); 549 my $crc32 = unpack ("V", substr($buffer, 14-4, 4)); 550 my $compressedLength = U64::newUnpack_V32 substr($buffer, 18-4, 4); 551 my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4); 552 my $filename_length = unpack ("v", substr($buffer, 26-4, 2)); 553 my $extra_length = unpack ("v", substr($buffer, 28-4, 2)); 554 555 my $filename; 556 my $extraField; 557 my @EXTRA = (); 558 my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ; 559 my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0; 560 561 return $self->HeaderError("Encrypted content not supported") 562 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK); 563 564 return $self->HeaderError("Patch content not supported") 565 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK; 566 567 *$self->{ZipData}{Streaming} = $streamingMode; 568 569 570 if ($filename_length) 571 { 572 $self->smartReadExact(\$filename, $filename_length) 573 or return $self->TruncatedHeader("Filename"); 574 575 if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004) 576 { 577 require Encode; 578 eval { $filename = Encode::decode_utf8($filename, 1) } 579 or Carp::croak "Zip Filename not UTF-8" ; 580 } 581 582 $keep .= $filename ; 583 } 584 585 my $zip64 = 0 ; 586 587 if ($extra_length) 588 { 589 $self->smartReadExact(\$extraField, $extra_length) 590 or return $self->TruncatedHeader("Extra Field"); 591 592 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField, 593 \@EXTRA, 1, 0); 594 return $self->HeaderError($bad) 595 if defined $bad; 596 597 $keep .= $extraField ; 598 599 my %Extra ; 600 for (@EXTRA) 601 { 602 $Extra{$_->[0]} = \$_->[1]; 603 } 604 605 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()}) 606 { 607 $zip64 = 1 ; 608 609 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} }; 610 611 # This code assumes that all the fields in the Zip64 612 # extra field aren't necessarily present. The spec says that 613 # they only exist if the equivalent local headers are -1. 614 615 if (! $streamingMode) { 616 my $offset = 0 ; 617 618 if (U64::full32 $uncompressedLength->get32bit() ) { 619 $uncompressedLength 620 = U64::newUnpack_V64 substr($buff, 0, 8); 621 622 $offset += 8 ; 623 } 624 625 if (U64::full32 $compressedLength->get32bit() ) { 626 627 $compressedLength 628 = U64::newUnpack_V64 substr($buff, $offset, 8); 629 630 $offset += 8 ; 631 } 632 } 633 } 634 } 635 636 *$self->{ZipData}{Zip64} = $zip64; 637 638 if (! $streamingMode) { 639 *$self->{ZipData}{Streaming} = 0; 640 *$self->{ZipData}{Crc32} = $crc32; 641 *$self->{ZipData}{CompressedLen} = $compressedLength; 642 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength; 643 *$self->{CompressedInputLengthRemaining} = 644 *$self->{CompressedInputLength} = $compressedLength->get64bit(); 645 } 646 647 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); 648 *$self->{ZipData}{Method} = $compressedMethod; 649 if ($compressedMethod == ZIP_CM_DEFLATE) 650 { 651 *$self->{Type} = 'zip-deflate'; 652 my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0); 653 654 *$self->{Uncomp} = $obj; 655 } 656 elsif ($compressedMethod == ZIP_CM_BZIP2) 657 { 658 return $self->HeaderError("Unsupported Compression format $compressedMethod") 659 if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ; 660 661 *$self->{Type} = 'zip-bzip2'; 662 663 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); 664 665 *$self->{Uncomp} = $obj; 666 } 667 elsif ($compressedMethod == ZIP_CM_LZMA) 668 { 669 return $self->HeaderError("Unsupported Compression format $compressedMethod") 670 if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ; 671 672 *$self->{Type} = 'zip-lzma'; 673 my $LzmaHeader; 674 $self->smartReadExact(\$LzmaHeader, 4) 675 or return $self->saveErrorString(undef, "Truncated file"); 676 my ($verHi, $verLo) = unpack ("CC", substr($LzmaHeader, 0, 2)); 677 my $LzmaPropertiesSize = unpack ("v", substr($LzmaHeader, 2, 2)); 678 679 680 my $LzmaPropertyData; 681 $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize) 682 or return $self->saveErrorString(undef, "Truncated file"); 683 684 if (! $streamingMode) { 685 *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ; 686 *$self->{CompressedInputLengthRemaining} = 687 *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit(); 688 } 689 690 my $obj = 691 IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData); 692 693 *$self->{Uncomp} = $obj; 694 } 695 elsif ($compressedMethod == ZIP_CM_STORE) 696 { 697 *$self->{Type} = 'zip-stored'; 698 699 my $obj = 700 IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode, 701 $zip64); 702 703 *$self->{Uncomp} = $obj; 704 } 705 else 706 { 707 return $self->HeaderError("Unsupported Compression format $compressedMethod"); 708 } 709 710 return { 711 'Type' => 'zip', 712 'FingerprintLength' => 4, 713 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0, 714 'HeaderLength' => length $keep, 715 'Zip64' => $zip64, 716 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16, 717 'Header' => $keep, 718 'CompressedLength' => $compressedLength , 719 'UncompressedLength' => $uncompressedLength , 720 'CRC32' => $crc32 , 721 'Name' => $filename, 722 'efs' => $efs_flag, # language encoding flag 723 'Time' => _dosToUnixTime($lastModTime), 724 'Stream' => $streamingMode, 725 726 'MethodID' => $compressedMethod, 727 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE 728 ? "Deflated" 729 : $compressedMethod == ZIP_CM_BZIP2 730 ? "Bzip2" 731 : $compressedMethod == ZIP_CM_LZMA 732 ? "Lzma" 733 : $compressedMethod == ZIP_CM_STORE 734 ? "Stored" 735 : "Unknown" , 736 737# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, 738# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, 739# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, 740# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, 741# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, 742# 'Comment' => $comment, 743# 'OsID' => $os, 744# 'OsName' => defined $GZIP_OS_Names{$os} 745# ? $GZIP_OS_Names{$os} : "Unknown", 746# 'HeaderCRC' => $HeaderCRC, 747# 'Flags' => $flag, 748# 'ExtraFlags' => $xfl, 749 'ExtraFieldRaw' => $extraField, 750 'ExtraField' => [ @EXTRA ], 751 752 753 } 754} 755 756sub filterUncompressed 757{ 758 my $self = shift ; 759 760 if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { 761 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ; 762 } 763 else { 764 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]); 765 } 766} 767 768 769# from Archive::Zip & info-zip 770sub _dosToUnixTime 771{ 772 my $dt = shift; 773 774 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; 775 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; 776 my $mday = ( ( $dt >> 16 ) & 0x1f ); 777 778 my $hour = ( ( $dt >> 11 ) & 0x1f ); 779 my $min = ( ( $dt >> 5 ) & 0x3f ); 780 my $sec = ( ( $dt << 1 ) & 0x3e ); 781 782 783 use POSIX 'mktime'; 784 785 my $time_t = mktime( $sec, $min, $hour, $mday, $mon, $year, 0, 0, -1 ); 786 return 0 if ! defined $time_t; 787 return $time_t; 788} 789 790#sub scanCentralDirectory 791#{ 792# # Use cases 793# # 1 32-bit CD 794# # 2 64-bit CD 795# 796# my $self = shift ; 797# 798# my @CD = (); 799# my $offset = $self->findCentralDirectoryOffset(); 800# 801# return 0 802# if ! defined $offset; 803# 804# $self->smarkSeek($offset, 0, SEEK_SET) ; 805# 806# # Now walk the Central Directory Records 807# my $buffer ; 808# while ($self->smartReadExact(\$buffer, 46) && 809# unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 810# 811# my $compressedLength = unpack ("V", substr($buffer, 20, 4)); 812# my $filename_length = unpack ("v", substr($buffer, 28, 2)); 813# my $extra_length = unpack ("v", substr($buffer, 30, 2)); 814# my $comment_length = unpack ("v", substr($buffer, 32, 2)); 815# 816# $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR) 817# if $extra_length || $comment_length || $filename_length; 818# push @CD, $compressedLength ; 819# } 820# 821#} 822# 823#sub findCentralDirectoryOffset 824#{ 825# my $self = shift ; 826# 827# # Most common use-case is where there is no comment, so 828# # know exactly where the end of central directory record 829# # should be. 830# 831# $self->smarkSeek(-22, 0, SEEK_END) ; 832# 833# my $buffer; 834# $self->smartReadExact(\$buffer, 22) ; 835# 836# my $zip64 = 0; 837# my $centralDirOffset ; 838# if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 839# $centralDirOffset = unpack ("V", substr($buffer, 16, 2)); 840# } 841# else { 842# die "xxxx"; 843# } 844# 845# return $centralDirOffset ; 846#} 847# 848#sub is84BitCD 849#{ 850# # TODO 851# my $self = shift ; 852#} 853 854 855sub skip 856{ 857 my $self = shift; 858 my $size = shift; 859 860 use Fcntl qw(SEEK_CUR); 861 if (ref $size eq 'U64') { 862 $self->smartSeek($size->get64bit(), SEEK_CUR); 863 } 864 else { 865 $self->smartSeek($size, SEEK_CUR); 866 } 867 868} 869 870 871sub scanCentralDirectory 872{ 873 my $self = shift; 874 875 my $here = $self->tell(); 876 877 # Use cases 878 # 1 32-bit CD 879 # 2 64-bit CD 880 881 my @CD = (); 882 my $offset = $self->findCentralDirectoryOffset(); 883 884 return () 885 if ! defined $offset; 886 887 $self->smarkSeek($offset, 0, SEEK_SET) ; 888 889 # Now walk the Central Directory Records 890 my $buffer ; 891 while ($self->smartReadExact(\$buffer, 46) && 892 unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 893 894 my $compressedLength = unpack("V", substr($buffer, 20, 4)); 895 my $uncompressedLength = unpack("V", substr($buffer, 24, 4)); 896 my $filename_length = unpack("v", substr($buffer, 28, 2)); 897 my $extra_length = unpack("v", substr($buffer, 30, 2)); 898 my $comment_length = unpack("v", substr($buffer, 32, 2)); 899 900 $self->skip($filename_length ) ; 901 902 my $v64 = new U64 $compressedLength ; 903 904 if (U64::full32 $compressedLength ) { 905 $self->smartReadExact(\$buffer, $extra_length) ; 906 die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer) 907 if length($buffer) != $extra_length; 908 my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength); 909 910 # If not Zip64 extra field, assume size is 0xFFFFFFFF 911 $v64 = $got if defined $got; 912 } 913 else { 914 $self->skip($extra_length) ; 915 } 916 917 $self->skip($comment_length ) ; 918 919 push @CD, $v64 ; 920 } 921 922 $self->smartSeek($here, 0, SEEK_SET) ; 923 924 return @CD; 925} 926 927sub get64Extra 928{ 929 my $self = shift ; 930 931 my $buffer = shift; 932 my $is_uncomp = shift ; 933 934 my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer); 935 936 if (! defined $extra) 937 { 938 return undef; 939 } 940 else 941 { 942 my $u64 = U64::newUnpack_V64(substr($extra, $is_uncomp ? 8 : 0)) ; 943 return $u64; 944 } 945} 946 947sub offsetFromZip64 948{ 949 my $self = shift ; 950 my $here = shift; 951 952 $self->smartSeek($here - 20, 0, SEEK_SET) 953 or die "xx $!" ; 954 955 my $buffer; 956 my $got = 0; 957 $self->smartReadExact(\$buffer, 20) 958 or die "xxx $here $got $!" ; 959 960 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) { 961 my $cd64 = U64::Value_VV64 substr($buffer, 8, 8); 962 963 $self->smartSeek($cd64, 0, SEEK_SET) ; 964 965 $self->smartReadExact(\$buffer, 4) 966 or die "xxx" ; 967 968 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) { 969 970 $self->smartReadExact(\$buffer, 8) 971 or die "xxx" ; 972 my $size = U64::Value_VV64($buffer); 973 $self->smartReadExact(\$buffer, $size) 974 or die "xxx" ; 975 976 my $cd64 = U64::Value_VV64 substr($buffer, 36, 8); 977 978 return $cd64 ; 979 } 980 981 die "zzz"; 982 } 983 984 die "zzz"; 985} 986 987use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); 988 989sub findCentralDirectoryOffset 990{ 991 my $self = shift ; 992 993 # Most common use-case is where there is no comment, so 994 # know exactly where the end of central directory record 995 # should be. 996 997 $self->smartSeek(-22, 0, SEEK_END) ; 998 my $here = $self->tell(); 999 1000 my $buffer; 1001 $self->smartReadExact(\$buffer, 22) 1002 or die "xxx" ; 1003 1004 my $zip64 = 0; 1005 my $centralDirOffset ; 1006 if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 1007 $centralDirOffset = unpack("V", substr($buffer, 16, 4)); 1008 } 1009 else { 1010 $self->smartSeek(0, 0, SEEK_END) ; 1011 1012 my $fileLen = $self->tell(); 1013 my $want = 0 ; 1014 1015 while(1) { 1016 $want += 1024; 1017 my $seekTo = $fileLen - $want; 1018 if ($seekTo < 0 ) { 1019 $seekTo = 0; 1020 $want = $fileLen ; 1021 } 1022 $self->smartSeek( $seekTo, 0, SEEK_SET) 1023 or die "xxx $!" ; 1024 my $got; 1025 $self->smartReadExact($buffer, $want) 1026 or die "xxx " ; 1027 my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); 1028 1029 if ($pos >= 0) { 1030 #$here = $self->tell(); 1031 $here = $seekTo + $pos ; 1032 $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); 1033 last ; 1034 } 1035 1036 return undef 1037 if $want == $fileLen; 1038 } 1039 } 1040 1041 $centralDirOffset = $self->offsetFromZip64($here) 1042 if U64::full32 $centralDirOffset ; 1043 1044 return $centralDirOffset ; 1045} 1046 10471; 1048 1049__END__ 1050 1051 1052=head1 NAME 1053 1054IO::Uncompress::Unzip - Read zip files/buffers 1055 1056=head1 SYNOPSIS 1057 1058 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1059 1060 my $status = unzip $input => $output [,OPTS] 1061 or die "unzip failed: $UnzipError\n"; 1062 1063 my $z = new IO::Uncompress::Unzip $input [OPTS] 1064 or die "unzip failed: $UnzipError\n"; 1065 1066 $status = $z->read($buffer) 1067 $status = $z->read($buffer, $length) 1068 $status = $z->read($buffer, $length, $offset) 1069 $line = $z->getline() 1070 $char = $z->getc() 1071 $char = $z->ungetc() 1072 $char = $z->opened() 1073 1074 $status = $z->inflateSync() 1075 1076 $data = $z->trailingData() 1077 $status = $z->nextStream() 1078 $data = $z->getHeaderInfo() 1079 $z->tell() 1080 $z->seek($position, $whence) 1081 $z->binmode() 1082 $z->fileno() 1083 $z->eof() 1084 $z->close() 1085 1086 $UnzipError ; 1087 1088 # IO::File mode 1089 1090 <$z> 1091 read($z, $buffer); 1092 read($z, $buffer, $length); 1093 read($z, $buffer, $length, $offset); 1094 tell($z) 1095 seek($z, $position, $whence) 1096 binmode($z) 1097 fileno($z) 1098 eof($z) 1099 close($z) 1100 1101=head1 DESCRIPTION 1102 1103This module provides a Perl interface that allows the reading of 1104zlib files/buffers. 1105 1106For writing zip files/buffers, see the companion module IO::Compress::Zip. 1107 1108=head1 Functional Interface 1109 1110A top-level function, C<unzip>, is provided to carry out 1111"one-shot" uncompression between buffers and/or files. For finer 1112control over the uncompression process, see the L</"OO Interface"> 1113section. 1114 1115 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1116 1117 unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] 1118 or die "unzip failed: $UnzipError\n"; 1119 1120The functional interface needs Perl5.005 or better. 1121 1122=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] 1123 1124C<unzip> expects at least two parameters, 1125C<$input_filename_or_reference> and C<$output_filename_or_reference> 1126and zero or more optional parameters (see L</Optional Parameters>) 1127 1128=head3 The C<$input_filename_or_reference> parameter 1129 1130The parameter, C<$input_filename_or_reference>, is used to define the 1131source of the compressed data. 1132 1133It can take one of the following forms: 1134 1135=over 5 1136 1137=item A filename 1138 1139If the C<$input_filename_or_reference> parameter is a simple scalar, it is 1140assumed to be a filename. This file will be opened for reading and the 1141input data will be read from it. 1142 1143=item A filehandle 1144 1145If the C<$input_filename_or_reference> parameter is a filehandle, the input 1146data will be read from it. The string '-' can be used as an alias for 1147standard input. 1148 1149=item A scalar reference 1150 1151If C<$input_filename_or_reference> is a scalar reference, the input data 1152will be read from C<$$input_filename_or_reference>. 1153 1154=item An array reference 1155 1156If C<$input_filename_or_reference> is an array reference, each element in 1157the array must be a filename. 1158 1159The input data will be read from each file in turn. 1160 1161The complete array will be walked to ensure that it only 1162contains valid filenames before any data is uncompressed. 1163 1164=item An Input FileGlob string 1165 1166If C<$input_filename_or_reference> is a string that is delimited by the 1167characters "<" and ">" C<unzip> will assume that it is an 1168I<input fileglob string>. The input is the list of files that match the 1169fileglob. 1170 1171See L<File::GlobMapper|File::GlobMapper> for more details. 1172 1173=back 1174 1175If the C<$input_filename_or_reference> parameter is any other type, 1176C<undef> will be returned. 1177 1178=head3 The C<$output_filename_or_reference> parameter 1179 1180The parameter C<$output_filename_or_reference> is used to control the 1181destination of the uncompressed data. This parameter can take one of 1182these forms. 1183 1184=over 5 1185 1186=item A filename 1187 1188If the C<$output_filename_or_reference> parameter is a simple scalar, it is 1189assumed to be a filename. This file will be opened for writing and the 1190uncompressed data will be written to it. 1191 1192=item A filehandle 1193 1194If the C<$output_filename_or_reference> parameter is a filehandle, the 1195uncompressed data will be written to it. The string '-' can be used as 1196an alias for standard output. 1197 1198=item A scalar reference 1199 1200If C<$output_filename_or_reference> is a scalar reference, the 1201uncompressed data will be stored in C<$$output_filename_or_reference>. 1202 1203=item An Array Reference 1204 1205If C<$output_filename_or_reference> is an array reference, 1206the uncompressed data will be pushed onto the array. 1207 1208=item An Output FileGlob 1209 1210If C<$output_filename_or_reference> is a string that is delimited by the 1211characters "<" and ">" C<unzip> will assume that it is an 1212I<output fileglob string>. The output is the list of files that match the 1213fileglob. 1214 1215When C<$output_filename_or_reference> is an fileglob string, 1216C<$input_filename_or_reference> must also be a fileglob string. Anything 1217else is an error. 1218 1219See L<File::GlobMapper|File::GlobMapper> for more details. 1220 1221=back 1222 1223If the C<$output_filename_or_reference> parameter is any other type, 1224C<undef> will be returned. 1225 1226=head2 Notes 1227 1228When C<$input_filename_or_reference> maps to multiple compressed 1229files/buffers and C<$output_filename_or_reference> is 1230a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a 1231concatenation of all the uncompressed data from each of the input 1232files/buffers. 1233 1234=head2 Optional Parameters 1235 1236The optional parameters for the one-shot function C<unzip> 1237are (for the most part) identical to those used with the OO interface defined in the 1238L</"Constructor Options"> section. The exceptions are listed below 1239 1240=over 5 1241 1242=item C<< AutoClose => 0|1 >> 1243 1244This option applies to any input or output data streams to 1245C<unzip> that are filehandles. 1246 1247If C<AutoClose> is specified, and the value is true, it will result in all 1248input and/or output filehandles being closed once C<unzip> has 1249completed. 1250 1251This parameter defaults to 0. 1252 1253=item C<< BinModeOut => 0|1 >> 1254 1255This option is now a no-op. All files will be written in binmode. 1256 1257=item C<< Append => 0|1 >> 1258 1259The behaviour of this option is dependent on the type of output data 1260stream. 1261 1262=over 5 1263 1264=item * A Buffer 1265 1266If C<Append> is enabled, all uncompressed data will be append to the end of 1267the output buffer. Otherwise the output buffer will be cleared before any 1268uncompressed data is written to it. 1269 1270=item * A Filename 1271 1272If C<Append> is enabled, the file will be opened in append mode. Otherwise 1273the contents of the file, if any, will be truncated before any uncompressed 1274data is written to it. 1275 1276=item * A Filehandle 1277 1278If C<Append> is enabled, the filehandle will be positioned to the end of 1279the file via a call to C<seek> before any uncompressed data is 1280written to it. Otherwise the file pointer will not be moved. 1281 1282=back 1283 1284When C<Append> is specified, and set to true, it will I<append> all uncompressed 1285data to the output data stream. 1286 1287So when the output is a filehandle it will carry out a seek to the eof 1288before writing any uncompressed data. If the output is a filename, it will be opened for 1289appending. If the output is a buffer, all uncompressed data will be 1290appended to the existing buffer. 1291 1292Conversely when C<Append> is not specified, or it is present and is set to 1293false, it will operate as follows. 1294 1295When the output is a filename, it will truncate the contents of the file 1296before writing any uncompressed data. If the output is a filehandle 1297its position will not be changed. If the output is a buffer, it will be 1298wiped before any uncompressed data is output. 1299 1300Defaults to 0. 1301 1302=item C<< MultiStream => 0|1 >> 1303 1304If the input file/buffer contains multiple compressed data streams, this 1305option will uncompress the whole lot as a single data stream. 1306 1307Defaults to 0. 1308 1309=item C<< TrailingData => $scalar >> 1310 1311Returns the data, if any, that is present immediately after the compressed 1312data stream once uncompression is complete. 1313 1314This option can be used when there is useful information immediately 1315following the compressed data stream, and you don't know the length of the 1316compressed data stream. 1317 1318If the input is a buffer, C<trailingData> will return everything from the 1319end of the compressed data stream to the end of the buffer. 1320 1321If the input is a filehandle, C<trailingData> will return the data that is 1322left in the filehandle input buffer once the end of the compressed data 1323stream has been reached. You can then use the filehandle to read the rest 1324of the input file. 1325 1326Don't bother using C<trailingData> if the input is a filename. 1327 1328If you know the length of the compressed data stream before you start 1329uncompressing, you can avoid having to use C<trailingData> by setting the 1330C<InputLength> option. 1331 1332=back 1333 1334=head2 Examples 1335 1336Say you have a zip file, C<file1.zip>, that only contains a 1337single member, you can read it and write the uncompressed data to the 1338file C<file1.txt> like this. 1339 1340 use strict ; 1341 use warnings ; 1342 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1343 1344 my $input = "file1.zip"; 1345 my $output = "file1.txt"; 1346 unzip $input => $output 1347 or die "unzip failed: $UnzipError\n"; 1348 1349If you have a zip file that contains multiple members and want to read a 1350specific member from the file, say C<"data1">, use the C<Name> option 1351 1352 use strict ; 1353 use warnings ; 1354 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1355 1356 my $input = "file1.zip"; 1357 my $output = "file1.txt"; 1358 unzip $input => $output, Name => "data1" 1359 or die "unzip failed: $UnzipError\n"; 1360 1361Alternatively, if you want to read the C<"data1"> member into memory, use 1362a scalar reference for the C<output> parameter. 1363 1364 use strict ; 1365 use warnings ; 1366 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1367 1368 my $input = "file1.zip"; 1369 my $output ; 1370 unzip $input => \$output, Name => "data1" 1371 or die "unzip failed: $UnzipError\n"; 1372 # $output now contains the uncompressed data 1373 1374To read from an existing Perl filehandle, C<$input>, and write the 1375uncompressed data to a buffer, C<$buffer>. 1376 1377 use strict ; 1378 use warnings ; 1379 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1380 use IO::File ; 1381 1382 my $input = new IO::File "<file1.zip" 1383 or die "Cannot open 'file1.zip': $!\n" ; 1384 my $buffer ; 1385 unzip $input => \$buffer 1386 or die "unzip failed: $UnzipError\n"; 1387 1388=head1 OO Interface 1389 1390=head2 Constructor 1391 1392The format of the constructor for IO::Uncompress::Unzip is shown below 1393 1394 my $z = new IO::Uncompress::Unzip $input [OPTS] 1395 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1396 1397Returns an C<IO::Uncompress::Unzip> object on success and undef on failure. 1398The variable C<$UnzipError> will contain an error message on failure. 1399 1400If you are running Perl 5.005 or better the object, C<$z>, returned from 1401IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle. 1402This means that all normal input file operations can be carried out with 1403C<$z>. For example, to read a line from a compressed file/buffer you can 1404use either of these forms 1405 1406 $line = $z->getline(); 1407 $line = <$z>; 1408 1409The mandatory parameter C<$input> is used to determine the source of the 1410compressed data. This parameter can take one of three forms. 1411 1412=over 5 1413 1414=item A filename 1415 1416If the C<$input> parameter is a scalar, it is assumed to be a filename. This 1417file will be opened for reading and the compressed data will be read from it. 1418 1419=item A filehandle 1420 1421If the C<$input> parameter is a filehandle, the compressed data will be 1422read from it. 1423The string '-' can be used as an alias for standard input. 1424 1425=item A scalar reference 1426 1427If C<$input> is a scalar reference, the compressed data will be read from 1428C<$$input>. 1429 1430=back 1431 1432=head2 Constructor Options 1433 1434The option names defined below are case insensitive and can be optionally 1435prefixed by a '-'. So all of the following are valid 1436 1437 -AutoClose 1438 -autoclose 1439 AUTOCLOSE 1440 autoclose 1441 1442OPTS is a combination of the following options: 1443 1444=over 5 1445 1446=item C<< Name => "membername" >> 1447 1448Open "membername" from the zip file for reading. 1449 1450=item C<< Efs => 0| 1 >> 1451 1452When this option is set to true AND the zip archive being read has 1453the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8. 1454 1455If the member name in the zip archive is not valid UTF-8 when this optionn is true, 1456the script will die with an error message. 1457 1458Note that this option only works with Perl 5.8.4 or better. 1459 1460This option defaults to B<false>. 1461 1462=item C<< AutoClose => 0|1 >> 1463 1464This option is only valid when the C<$input> parameter is a filehandle. If 1465specified, and the value is true, it will result in the file being closed once 1466either the C<close> method is called or the IO::Uncompress::Unzip object is 1467destroyed. 1468 1469This parameter defaults to 0. 1470 1471=item C<< MultiStream => 0|1 >> 1472 1473Treats the complete zip file/buffer as a single compressed data 1474stream. When reading in multi-stream mode each member of the zip 1475file/buffer will be uncompressed in turn until the end of the file/buffer 1476is encountered. 1477 1478This parameter defaults to 0. 1479 1480=item C<< Prime => $string >> 1481 1482This option will uncompress the contents of C<$string> before processing the 1483input file/buffer. 1484 1485This option can be useful when the compressed data is embedded in another 1486file/data structure and it is not possible to work out where the compressed 1487data begins without having to read the first few bytes. If this is the 1488case, the uncompression can be I<primed> with these bytes using this 1489option. 1490 1491=item C<< Transparent => 0|1 >> 1492 1493If this option is set and the input file/buffer is not compressed data, 1494the module will allow reading of it anyway. 1495 1496In addition, if the input file/buffer does contain compressed data and 1497there is non-compressed data immediately following it, setting this option 1498will make this module treat the whole file/buffer as a single data stream. 1499 1500This option defaults to 1. 1501 1502=item C<< BlockSize => $num >> 1503 1504When reading the compressed input data, IO::Uncompress::Unzip will read it in 1505blocks of C<$num> bytes. 1506 1507This option defaults to 4096. 1508 1509=item C<< InputLength => $size >> 1510 1511When present this option will limit the number of compressed bytes read 1512from the input file/buffer to C<$size>. This option can be used in the 1513situation where there is useful data directly after the compressed data 1514stream and you know beforehand the exact length of the compressed data 1515stream. 1516 1517This option is mostly used when reading from a filehandle, in which case 1518the file pointer will be left pointing to the first byte directly after the 1519compressed data stream. 1520 1521This option defaults to off. 1522 1523=item C<< Append => 0|1 >> 1524 1525This option controls what the C<read> method does with uncompressed data. 1526 1527If set to 1, all uncompressed data will be appended to the output parameter 1528of the C<read> method. 1529 1530If set to 0, the contents of the output parameter of the C<read> method 1531will be overwritten by the uncompressed data. 1532 1533Defaults to 0. 1534 1535=item C<< Strict => 0|1 >> 1536 1537This option controls whether the extra checks defined below are used when 1538carrying out the decompression. When Strict is on, the extra tests are 1539carried out, when Strict is off they are not. 1540 1541The default for this option is off. 1542 1543=back 1544 1545=head2 Examples 1546 1547TODO 1548 1549=head1 Methods 1550 1551=head2 read 1552 1553Usage is 1554 1555 $status = $z->read($buffer) 1556 1557Reads a block of compressed data (the size of the compressed block is 1558determined by the C<Buffer> option in the constructor), uncompresses it and 1559writes any uncompressed data into C<$buffer>. If the C<Append> parameter is 1560set in the constructor, the uncompressed data will be appended to the 1561C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. 1562 1563Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1564or a negative number on error. 1565 1566=head2 read 1567 1568Usage is 1569 1570 $status = $z->read($buffer, $length) 1571 $status = $z->read($buffer, $length, $offset) 1572 1573 $status = read($z, $buffer, $length) 1574 $status = read($z, $buffer, $length, $offset) 1575 1576Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. 1577 1578The main difference between this form of the C<read> method and the 1579previous one, is that this one will attempt to return I<exactly> C<$length> 1580bytes. The only circumstances that this function will not is if end-of-file 1581or an IO error is encountered. 1582 1583Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1584or a negative number on error. 1585 1586=head2 getline 1587 1588Usage is 1589 1590 $line = $z->getline() 1591 $line = <$z> 1592 1593Reads a single line. 1594 1595This method fully supports the use of the variable C<$/> (or 1596C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to 1597determine what constitutes an end of line. Paragraph mode, record mode and 1598file slurp mode are all supported. 1599 1600=head2 getc 1601 1602Usage is 1603 1604 $char = $z->getc() 1605 1606Read a single character. 1607 1608=head2 ungetc 1609 1610Usage is 1611 1612 $char = $z->ungetc($string) 1613 1614=head2 inflateSync 1615 1616Usage is 1617 1618 $status = $z->inflateSync() 1619 1620TODO 1621 1622=head2 getHeaderInfo 1623 1624Usage is 1625 1626 $hdr = $z->getHeaderInfo(); 1627 @hdrs = $z->getHeaderInfo(); 1628 1629This method returns either a hash reference (in scalar context) or a list 1630or hash references (in array context) that contains information about each 1631of the header fields in the compressed data stream(s). 1632 1633=head2 tell 1634 1635Usage is 1636 1637 $z->tell() 1638 tell $z 1639 1640Returns the uncompressed file offset. 1641 1642=head2 eof 1643 1644Usage is 1645 1646 $z->eof(); 1647 eof($z); 1648 1649Returns true if the end of the compressed input stream has been reached. 1650 1651=head2 seek 1652 1653 $z->seek($position, $whence); 1654 seek($z, $position, $whence); 1655 1656Provides a sub-set of the C<seek> functionality, with the restriction 1657that it is only legal to seek forward in the input file/buffer. 1658It is a fatal error to attempt to seek backward. 1659 1660Note that the implementation of C<seek> in this module does not provide 1661true random access to a compressed file/buffer. It works by uncompressing 1662data from the current offset in the file/buffer until it reaches the 1663uncompressed offset specified in the parameters to C<seek>. For very small 1664files this may be acceptable behaviour. For large files it may cause an 1665unacceptable delay. 1666 1667The C<$whence> parameter takes one the usual values, namely SEEK_SET, 1668SEEK_CUR or SEEK_END. 1669 1670Returns 1 on success, 0 on failure. 1671 1672=head2 binmode 1673 1674Usage is 1675 1676 $z->binmode 1677 binmode $z ; 1678 1679This is a noop provided for completeness. 1680 1681=head2 opened 1682 1683 $z->opened() 1684 1685Returns true if the object currently refers to a opened file/buffer. 1686 1687=head2 autoflush 1688 1689 my $prev = $z->autoflush() 1690 my $prev = $z->autoflush(EXPR) 1691 1692If the C<$z> object is associated with a file or a filehandle, this method 1693returns the current autoflush setting for the underlying filehandle. If 1694C<EXPR> is present, and is non-zero, it will enable flushing after every 1695write/print operation. 1696 1697If C<$z> is associated with a buffer, this method has no effect and always 1698returns C<undef>. 1699 1700B<Note> that the special variable C<$|> B<cannot> be used to set or 1701retrieve the autoflush setting. 1702 1703=head2 input_line_number 1704 1705 $z->input_line_number() 1706 $z->input_line_number(EXPR) 1707 1708Returns the current uncompressed line number. If C<EXPR> is present it has 1709the effect of setting the line number. Note that setting the line number 1710does not change the current position within the file/buffer being read. 1711 1712The contents of C<$/> are used to determine what constitutes a line 1713terminator. 1714 1715=head2 fileno 1716 1717 $z->fileno() 1718 fileno($z) 1719 1720If the C<$z> object is associated with a file or a filehandle, C<fileno> 1721will return the underlying file descriptor. Once the C<close> method is 1722called C<fileno> will return C<undef>. 1723 1724If the C<$z> object is associated with a buffer, this method will return 1725C<undef>. 1726 1727=head2 close 1728 1729 $z->close() ; 1730 close $z ; 1731 1732Closes the output file/buffer. 1733 1734For most versions of Perl this method will be automatically invoked if 1735the IO::Uncompress::Unzip object is destroyed (either explicitly or by the 1736variable with the reference to the object going out of scope). The 1737exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In 1738these cases, the C<close> method will be called automatically, but 1739not until global destruction of all live objects when the program is 1740terminating. 1741 1742Therefore, if you want your scripts to be able to run on all versions 1743of Perl, you should call C<close> explicitly and not rely on automatic 1744closing. 1745 1746Returns true on success, otherwise 0. 1747 1748If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip 1749object was created, and the object is associated with a file, the 1750underlying file will also be closed. 1751 1752=head2 nextStream 1753 1754Usage is 1755 1756 my $status = $z->nextStream(); 1757 1758Skips to the next compressed data stream in the input file/buffer. If a new 1759compressed data stream is found, the eof marker will be cleared and C<$.> 1760will be reset to 0. 1761 1762If trailing data is present immediately after the zip archive and the 1763C<Transparent> option is enabled, this method will consider that trailing 1764data to be another member of the zip archive. 1765 1766Returns 1 if a new stream was found, 0 if none was found, and -1 if an 1767error was encountered. 1768 1769=head2 trailingData 1770 1771Usage is 1772 1773 my $data = $z->trailingData(); 1774 1775Returns the data, if any, that is present immediately after the compressed 1776data stream once uncompression is complete. It only makes sense to call 1777this method once the end of the compressed data stream has been 1778encountered. 1779 1780This option can be used when there is useful information immediately 1781following the compressed data stream, and you don't know the length of the 1782compressed data stream. 1783 1784If the input is a buffer, C<trailingData> will return everything from the 1785end of the compressed data stream to the end of the buffer. 1786 1787If the input is a filehandle, C<trailingData> will return the data that is 1788left in the filehandle input buffer once the end of the compressed data 1789stream has been reached. You can then use the filehandle to read the rest 1790of the input file. 1791 1792Don't bother using C<trailingData> if the input is a filename. 1793 1794If you know the length of the compressed data stream before you start 1795uncompressing, you can avoid having to use C<trailingData> by setting the 1796C<InputLength> option in the constructor. 1797 1798=head1 Importing 1799 1800No symbolic constants are required by this IO::Uncompress::Unzip at present. 1801 1802=over 5 1803 1804=item :all 1805 1806Imports C<unzip> and C<$UnzipError>. 1807Same as doing this 1808 1809 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1810 1811=back 1812 1813=head1 EXAMPLES 1814 1815=head2 Working with Net::FTP 1816 1817See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP"> 1818 1819=head2 Walking through a zip file 1820 1821The code below can be used to traverse a zip file, one compressed data 1822stream at a time. 1823 1824 use IO::Uncompress::Unzip qw($UnzipError); 1825 1826 my $zipfile = "somefile.zip"; 1827 my $u = new IO::Uncompress::Unzip $zipfile 1828 or die "Cannot open $zipfile: $UnzipError"; 1829 1830 my $status; 1831 for ($status = 1; $status > 0; $status = $u->nextStream()) 1832 { 1833 1834 my $name = $u->getHeaderInfo()->{Name}; 1835 warn "Processing member $name\n" ; 1836 1837 my $buff; 1838 while (($status = $u->read($buff)) > 0) { 1839 # Do something here 1840 } 1841 1842 last if $status < 0; 1843 } 1844 1845 die "Error processing $zipfile: $!\n" 1846 if $status < 0 ; 1847 1848Each individual compressed data stream is read until the logical 1849end-of-file is reached. Then C<nextStream> is called. This will skip to the 1850start of the next compressed data stream and clear the end-of-file flag. 1851 1852It is also worth noting that C<nextStream> can be called at any time -- you 1853don't have to wait until you have exhausted a compressed data stream before 1854skipping to the next one. 1855 1856=head2 Unzipping a complete zip file to disk 1857 1858Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip> 1859to read a zip file and unzip its contents to disk. 1860 1861The script is available from L<https://gist.github.com/eqhmcow/5389877> 1862 1863=head1 SUPPORT 1864 1865General feedback/questions/bug reports should be sent to 1866L<https://github.com/pmqs/IO-Compress/issues> (preferred) or 1867L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>. 1868 1869=head1 SEE ALSO 1870 1871L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress> 1872 1873L<IO::Compress::FAQ|IO::Compress::FAQ> 1874 1875L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>, 1876L<Archive::Tar|Archive::Tar>, 1877L<IO::Zlib|IO::Zlib> 1878 1879For RFC 1950, 1951 and 1952 see 1880L<http://www.faqs.org/rfcs/rfc1950.html>, 1881L<http://www.faqs.org/rfcs/rfc1951.html> and 1882L<http://www.faqs.org/rfcs/rfc1952.html> 1883 1884The I<zlib> compression library was written by Jean-loup Gailly 1885C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>. 1886 1887The primary site for the I<zlib> compression library is 1888L<http://www.zlib.org>. 1889 1890The primary site for gzip is L<http://www.gzip.org>. 1891 1892=head1 AUTHOR 1893 1894This module was written by Paul Marquess, C<pmqs@cpan.org>. 1895 1896=head1 MODIFICATION HISTORY 1897 1898See the Changes file. 1899 1900=head1 COPYRIGHT AND LICENSE 1901 1902Copyright (c) 2005-2019 Paul Marquess. All rights reserved. 1903 1904This program is free software; you can redistribute it and/or 1905modify it under the same terms as Perl itself. 1906 1907