xref: /openbsd-src/gnu/usr.bin/perl/cpan/IO-Compress/lib/IO/Compress/Gzip.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1package IO::Compress::Gzip ;
2
3require 5.006 ;
4
5use strict ;
6use warnings;
7use bytes;
8
9require Exporter ;
10
11use IO::Compress::RawDeflate 2.212 () ;
12use IO::Compress::Adapter::Deflate 2.212 ;
13
14use IO::Compress::Base::Common  2.212 qw(:Status );
15use IO::Compress::Gzip::Constants 2.212 ;
16use IO::Compress::Zlib::Extra 2.212 ;
17
18BEGIN
19{
20    if (defined &utf8::downgrade )
21      { *noUTF8 = \&utf8::downgrade }
22    else
23      { *noUTF8 = sub {} }
24}
25
26our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, %DEFLATE_CONSTANTS, $GzipError);
27
28$VERSION = '2.212';
29$GzipError = '' ;
30
31@ISA    = qw(IO::Compress::RawDeflate Exporter);
32@EXPORT_OK = qw( $GzipError gzip ) ;
33%EXPORT_TAGS = %IO::Compress::RawDeflate::DEFLATE_CONSTANTS ;
34
35push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
36Exporter::export_ok_tags('all');
37
38sub new
39{
40    my $class = shift ;
41
42    my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$GzipError);
43
44    $obj->_create(undef, @_);
45}
46
47
48sub gzip
49{
50    my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$GzipError);
51    return $obj->_def(@_);
52}
53
54#sub newHeader
55#{
56#    my $self = shift ;
57#    #return GZIP_MINIMUM_HEADER ;
58#    return $self->mkHeader(*$self->{Got});
59#}
60
61sub getExtraParams
62{
63    my $self = shift ;
64
65    return (
66            # zlib behaviour
67            $self->getZlibParams(),
68
69            # Gzip header fields
70            'minimal'   => [IO::Compress::Base::Common::Parse_boolean,   0],
71            'comment'   => [IO::Compress::Base::Common::Parse_any,       undef],
72            'name'      => [IO::Compress::Base::Common::Parse_any,       undef],
73            'time'      => [IO::Compress::Base::Common::Parse_any,       undef],
74            'textflag'  => [IO::Compress::Base::Common::Parse_boolean,   0],
75            'headercrc' => [IO::Compress::Base::Common::Parse_boolean,   0],
76            'os_code'   => [IO::Compress::Base::Common::Parse_unsigned,  $Compress::Raw::Zlib::gzip_os_code],
77            'extrafield'=> [IO::Compress::Base::Common::Parse_any,       undef],
78            'extraflags'=> [IO::Compress::Base::Common::Parse_any,       undef],
79
80        );
81}
82
83
84sub ckParams
85{
86    my $self = shift ;
87    my $got = shift ;
88
89    # gzip always needs crc32
90    $got->setValue('crc32' => 1);
91
92    return 1
93        if $got->getValue('merge') ;
94
95    my $strict = $got->getValue('strict') ;
96
97
98    {
99        if (! $got->parsed('time') ) {
100            # Modification time defaults to now.
101            $got->setValue(time => time) ;
102        }
103
104        # Check that the Name & Comment don't have embedded NULLs
105        # Also check that they only contain ISO 8859-1 chars.
106        if ($got->parsed('name') && defined $got->getValue('name')) {
107            my $name = $got->getValue('name');
108
109            return $self->saveErrorString(undef, "Null Character found in Name",
110                                                Z_DATA_ERROR)
111                if $strict && $name =~ /\x00/ ;
112
113            return $self->saveErrorString(undef, "Non ISO 8859-1 Character found in Name",
114                                                Z_DATA_ERROR)
115                if $strict && $name =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
116        }
117
118        if ($got->parsed('comment') && defined $got->getValue('comment')) {
119            my $comment = $got->getValue('comment');
120
121            return $self->saveErrorString(undef, "Null Character found in Comment",
122                                                Z_DATA_ERROR)
123                if $strict && $comment =~ /\x00/ ;
124
125            return $self->saveErrorString(undef, "Non ISO 8859-1 Character found in Comment",
126                                                Z_DATA_ERROR)
127                if $strict && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o;
128        }
129
130        if ($got->parsed('os_code') ) {
131            my $value = $got->getValue('os_code');
132
133            return $self->saveErrorString(undef, "OS_Code must be between 0 and 255, got '$value'")
134                if $value < 0 || $value > 255 ;
135
136        }
137
138        # gzip only supports Deflate at present
139        $got->setValue('method' => Z_DEFLATED) ;
140
141        if ( ! $got->parsed('extraflags')) {
142            $got->setValue('extraflags' => 2)
143                if $got->getValue('level') == Z_BEST_COMPRESSION ;
144            $got->setValue('extraflags' => 4)
145                if $got->getValue('level') == Z_BEST_SPEED ;
146        }
147
148        my $data = $got->getValue('extrafield') ;
149        if (defined $data) {
150            my $bad = IO::Compress::Zlib::Extra::parseExtraField($data, $strict, 1) ;
151            return $self->saveErrorString(undef, "Error with ExtraField Parameter: $bad", Z_DATA_ERROR)
152                if $bad ;
153
154            $got->setValue('extrafield' => $data) ;
155        }
156    }
157
158    return 1;
159}
160
161sub mkTrailer
162{
163    my $self = shift ;
164    return pack("V V", *$self->{Compress}->crc32(),
165                       *$self->{UnCompSize}->get32bit());
166}
167
168sub getInverseClass
169{
170    no warnings 'once';
171    return ('IO::Uncompress::Gunzip',
172                \$IO::Uncompress::Gunzip::GunzipError);
173}
174
175sub getFileInfo
176{
177    my $self = shift ;
178    my $params = shift;
179    my $filename = shift ;
180
181    return if IO::Compress::Base::Common::isaScalar($filename);
182
183    my $defaultTime = (stat($filename))[9] ;
184
185    $params->setValue('name' => $filename)
186        if ! $params->parsed('name') ;
187
188    $params->setValue('time' => $defaultTime)
189        if ! $params->parsed('time') ;
190}
191
192
193sub mkHeader
194{
195    my $self = shift ;
196    my $param = shift ;
197
198    # short-circuit if a minimal header is requested.
199    return GZIP_MINIMUM_HEADER if $param->getValue('minimal') ;
200
201    # METHOD
202    my $method = $param->valueOrDefault('method', GZIP_CM_DEFLATED) ;
203
204    # FLAGS
205    my $flags       = GZIP_FLG_DEFAULT ;
206    $flags |= GZIP_FLG_FTEXT    if $param->getValue('textflag') ;
207    $flags |= GZIP_FLG_FHCRC    if $param->getValue('headercrc') ;
208    $flags |= GZIP_FLG_FEXTRA   if $param->wantValue('extrafield') ;
209    $flags |= GZIP_FLG_FNAME    if $param->wantValue('name') ;
210    $flags |= GZIP_FLG_FCOMMENT if $param->wantValue('comment') ;
211
212    # MTIME
213    my $time = $param->valueOrDefault('time', GZIP_MTIME_DEFAULT) ;
214
215    # EXTRA FLAGS
216    my $extra_flags = $param->valueOrDefault('extraflags', GZIP_XFL_DEFAULT);
217
218    # OS CODE
219    my $os_code = $param->valueOrDefault('os_code', GZIP_OS_DEFAULT) ;
220
221
222    my $out = pack("C4 V C C",
223            GZIP_ID1,   # ID1
224            GZIP_ID2,   # ID2
225            $method,    # Compression Method
226            $flags,     # Flags
227            $time,      # Modification Time
228            $extra_flags, # Extra Flags
229            $os_code,   # Operating System Code
230            ) ;
231
232    # EXTRA
233    if ($flags & GZIP_FLG_FEXTRA) {
234        my $extra = $param->getValue('extrafield') ;
235        $out .= pack("v", length $extra) . $extra ;
236    }
237
238    # NAME
239    if ($flags & GZIP_FLG_FNAME) {
240        my $name .= $param->getValue('name') ;
241        $name =~ s/\x00.*$//;
242        $out .= $name ;
243        # Terminate the filename with NULL unless it already is
244        $out .= GZIP_NULL_BYTE
245            if !length $name or
246               substr($name, 1, -1) ne GZIP_NULL_BYTE ;
247    }
248
249    # COMMENT
250    if ($flags & GZIP_FLG_FCOMMENT) {
251        my $comment .= $param->getValue('comment') ;
252        $comment =~ s/\x00.*$//;
253        $out .= $comment ;
254        # Terminate the comment with NULL unless it already is
255        $out .= GZIP_NULL_BYTE
256            if ! length $comment or
257               substr($comment, 1, -1) ne GZIP_NULL_BYTE;
258    }
259
260    # HEADER CRC
261    $out .= pack("v", Compress::Raw::Zlib::crc32($out) & 0x00FF )
262        if $param->getValue('headercrc') ;
263
264    noUTF8($out);
265
266    return $out ;
267}
268
269sub mkFinalTrailer
270{
271    return '';
272}
273
2741;
275
276__END__
277
278=head1 NAME
279
280IO::Compress::Gzip - Write RFC 1952 files/buffers
281
282=head1 SYNOPSIS
283
284    use IO::Compress::Gzip qw(gzip $GzipError) ;
285
286    my $status = gzip $input => $output [,OPTS]
287        or die "gzip failed: $GzipError\n";
288
289    my $z = IO::Compress::Gzip->new( $output [,OPTS] )
290        or die "gzip failed: $GzipError\n";
291
292    $z->print($string);
293    $z->printf($format, $string);
294    $z->write($string);
295    $z->syswrite($string [, $length, $offset]);
296    $z->flush();
297    $z->tell();
298    $z->eof();
299    $z->seek($position, $whence);
300    $z->binmode();
301    $z->fileno();
302    $z->opened();
303    $z->autoflush();
304    $z->input_line_number();
305    $z->newStream( [OPTS] );
306
307    $z->deflateParams();
308
309    $z->close() ;
310
311    $GzipError ;
312
313    # IO::File mode
314
315    print $z $string;
316    printf $z $format, $string;
317    tell $z
318    eof $z
319    seek $z, $position, $whence
320    binmode $z
321    fileno $z
322    close $z ;
323
324=head1 DESCRIPTION
325
326This module provides a Perl interface that allows writing compressed
327data to files or buffer as defined in RFC 1952.
328
329All the gzip headers defined in RFC 1952 can be created using
330this module.
331
332For reading RFC 1952 files/buffers, see the companion module
333L<IO::Uncompress::Gunzip|IO::Uncompress::Gunzip>.
334
335=head1 Functional Interface
336
337A top-level function, C<gzip>, is provided to carry out
338"one-shot" compression between buffers and/or files. For finer
339control over the compression process, see the L</"OO Interface">
340section.
341
342    use IO::Compress::Gzip qw(gzip $GzipError) ;
343
344    gzip $input_filename_or_reference => $output_filename_or_reference [,OPTS]
345        or die "gzip failed: $GzipError\n";
346
347The functional interface needs Perl5.005 or better.
348
349=head2 gzip $input_filename_or_reference => $output_filename_or_reference [, OPTS]
350
351C<gzip> expects at least two parameters,
352C<$input_filename_or_reference> and C<$output_filename_or_reference>
353and zero or more optional parameters (see L</Optional Parameters>)
354
355=head3 The C<$input_filename_or_reference> parameter
356
357The parameter, C<$input_filename_or_reference>, is used to define the
358source of the uncompressed data.
359
360It can take one of the following forms:
361
362=over 5
363
364=item A filename
365
366If the C<$input_filename_or_reference> parameter is a simple scalar, it is
367assumed to be a filename. This file will be opened for reading and the
368input data will be read from it.
369
370=item A filehandle
371
372If the C<$input_filename_or_reference> parameter is a filehandle, the input
373data will be read from it.  The string '-' can be used as an alias for
374standard input.
375
376=item A scalar reference
377
378If C<$input_filename_or_reference> is a scalar reference, the input data
379will be read from C<$$input_filename_or_reference>.
380
381=item An array reference
382
383If C<$input_filename_or_reference> is an array reference, each element in
384the array must be a filename.
385
386The input data will be read from each file in turn.
387
388The complete array will be walked to ensure that it only
389contains valid filenames before any data is compressed.
390
391=item An Input FileGlob string
392
393If C<$input_filename_or_reference> is a string that is delimited by the
394characters "<" and ">" C<gzip> will assume that it is an
395I<input fileglob string>. The input is the list of files that match the
396fileglob.
397
398See L<File::GlobMapper|File::GlobMapper> for more details.
399
400=back
401
402If the C<$input_filename_or_reference> parameter is any other type,
403C<undef> will be returned.
404
405In addition, if C<$input_filename_or_reference> is a simple filename,
406the default values for
407the C<Name> and C<Time> options will be sourced from that file.
408
409If you do not want to use these defaults they can be overridden by
410explicitly setting the C<Name> and C<Time> options or by setting the
411C<Minimal> parameter.
412
413=head3 The C<$output_filename_or_reference> parameter
414
415The parameter C<$output_filename_or_reference> is used to control the
416destination of the compressed data. This parameter can take one of
417these forms.
418
419=over 5
420
421=item A filename
422
423If the C<$output_filename_or_reference> parameter is a simple scalar, it is
424assumed to be a filename.  This file will be opened for writing and the
425compressed data will be written to it.
426
427=item A filehandle
428
429If the C<$output_filename_or_reference> parameter is a filehandle, the
430compressed data will be written to it.  The string '-' can be used as
431an alias for standard output.
432
433=item A scalar reference
434
435If C<$output_filename_or_reference> is a scalar reference, the
436compressed data will be stored in C<$$output_filename_or_reference>.
437
438=item An Array Reference
439
440If C<$output_filename_or_reference> is an array reference,
441the compressed data will be pushed onto the array.
442
443=item An Output FileGlob
444
445If C<$output_filename_or_reference> is a string that is delimited by the
446characters "<" and ">" C<gzip> will assume that it is an
447I<output fileglob string>. The output is the list of files that match the
448fileglob.
449
450When C<$output_filename_or_reference> is an fileglob string,
451C<$input_filename_or_reference> must also be a fileglob string. Anything
452else is an error.
453
454See L<File::GlobMapper|File::GlobMapper> for more details.
455
456=back
457
458If the C<$output_filename_or_reference> parameter is any other type,
459C<undef> will be returned.
460
461=head2 Notes
462
463When C<$input_filename_or_reference> maps to multiple files/buffers and
464C<$output_filename_or_reference> is a single
465file/buffer the input files/buffers will be stored
466in C<$output_filename_or_reference> as a concatenated series of compressed data streams.
467
468=head2 Optional Parameters
469
470The optional parameters for the one-shot function C<gzip>
471are (for the most part) identical to those used with the OO interface defined in the
472L</"Constructor Options"> section. The exceptions are listed below
473
474=over 5
475
476=item C<< AutoClose => 0|1 >>
477
478This option applies to any input or output data streams to
479C<gzip> that are filehandles.
480
481If C<AutoClose> is specified, and the value is true, it will result in all
482input and/or output filehandles being closed once C<gzip> has
483completed.
484
485This parameter defaults to 0.
486
487=item C<< BinModeIn => 0|1 >>
488
489This option is now a no-op. All files will be read in binmode.
490
491=item C<< Append => 0|1 >>
492
493The behaviour of this option is dependent on the type of output data
494stream.
495
496=over 5
497
498=item * A Buffer
499
500If C<Append> is enabled, all compressed data will be append to the end of
501the output buffer. Otherwise the output buffer will be cleared before any
502compressed data is written to it.
503
504=item * A Filename
505
506If C<Append> is enabled, the file will be opened in append mode. Otherwise
507the contents of the file, if any, will be truncated before any compressed
508data is written to it.
509
510=item * A Filehandle
511
512If C<Append> is enabled, the filehandle will be positioned to the end of
513the file via a call to C<seek> before any compressed data is
514written to it.  Otherwise the file pointer will not be moved.
515
516=back
517
518When C<Append> is specified, and set to true, it will I<append> all compressed
519data to the output data stream.
520
521So when the output is a filehandle it will carry out a seek to the eof
522before writing any compressed data. If the output is a filename, it will be opened for
523appending. If the output is a buffer, all compressed data will be
524appended to the existing buffer.
525
526Conversely when C<Append> is not specified, or it is present and is set to
527false, it will operate as follows.
528
529When the output is a filename, it will truncate the contents of the file
530before writing any compressed data. If the output is a filehandle
531its position will not be changed. If the output is a buffer, it will be
532wiped before any compressed data is output.
533
534Defaults to 0.
535
536=back
537
538=head2 Oneshot Examples
539
540Here are a few example that show the capabilities of the module.
541
542=head3 Streaming
543
544This very simple command line example demonstrates the streaming capabilities of the module.
545The code reads data from STDIN, compresses it, and writes the compressed data to STDOUT.
546
547    $ echo hello world | perl -MIO::Compress::Gzip=gzip -e 'gzip \*STDIN => \*STDOUT' >output.gz
548
549The special filename "-" can be used as a standin for both C<\*STDIN> and C<\*STDOUT>,
550so the above can be rewritten as
551
552    $ echo hello world | perl -MIO::Compress::Gzip=gzip -e 'gzip "-" => "-"' >output.gz
553
554=head3 Compressing a file from the filesystem
555
556To read the contents of the file C<file1.txt> and write the compressed
557data to the file C<file1.txt.gz>.
558
559    use strict ;
560    use warnings ;
561    use IO::Compress::Gzip qw(gzip $GzipError) ;
562
563    my $input = "file1.txt";
564    gzip $input => "$input.gz"
565        or die "gzip failed: $GzipError\n";
566
567=head3 Reading from a Filehandle and writing to an in-memory buffer
568
569To read from an existing Perl filehandle, C<$input>, and write the
570compressed data to a buffer, C<$buffer>.
571
572    use strict ;
573    use warnings ;
574    use IO::Compress::Gzip qw(gzip $GzipError) ;
575    use IO::File ;
576
577    my $input = IO::File->new( "<file1.txt" )
578        or die "Cannot open 'file1.txt': $!\n" ;
579    my $buffer ;
580    gzip $input => \$buffer
581        or die "gzip failed: $GzipError\n";
582
583=head3 Compressing multiple files
584
585To compress all files in the directory "/my/home" that match "*.txt"
586and store the compressed data in the same directory
587
588    use strict ;
589    use warnings ;
590    use IO::Compress::Gzip qw(gzip $GzipError) ;
591
592    gzip '</my/home/*.txt>' => '<*.gz>'
593        or die "gzip failed: $GzipError\n";
594
595and if you want to compress each file one at a time, this will do the trick
596
597    use strict ;
598    use warnings ;
599    use IO::Compress::Gzip qw(gzip $GzipError) ;
600
601    for my $input ( glob "/my/home/*.txt" )
602    {
603        my $output = "$input.gz" ;
604        gzip $input => $output
605            or die "Error compressing '$input': $GzipError\n";
606    }
607
608=head1 OO Interface
609
610=head2 Constructor
611
612The format of the constructor for C<IO::Compress::Gzip> is shown below
613
614    my $z = IO::Compress::Gzip->new( $output [,OPTS] )
615        or die "IO::Compress::Gzip failed: $GzipError\n";
616
617The constructor takes one mandatory parameter, C<$output>, defined below and
618zero or more C<OPTS>, defined in L<Constructor Options>.
619
620It returns an C<IO::Compress::Gzip> object on success and C<undef> on failure.
621The variable C<$GzipError> will contain an error message on failure.
622
623If you are running Perl 5.005 or better the object, C<$z>, returned from
624IO::Compress::Gzip can be used exactly like an L<IO::File|IO::File> filehandle.
625This means that all normal output file operations can be carried out
626with C<$z>.
627For example, to write to a compressed file/buffer you can use either of
628these forms
629
630    $z->print("hello world\n");
631    print $z "hello world\n";
632
633Below is a simple exaple of using the OO interface to create an output file
634C<myfile.gz> and write some data to it.
635
636    my $filename = "myfile.gz";
637    my $z = IO::Compress::Gzip->new($filename)
638        or die "IO::Compress::Gzip failed: $GzipError\n";
639
640    $z->print("abcde");
641    $z->close();
642
643See the L</Examples> for more.
644
645The mandatory parameter C<$output> is used to control the destination
646of the compressed data. This parameter can take one of these forms.
647
648=over 5
649
650=item A filename
651
652If the C<$output> parameter is a simple scalar, it is assumed to be a
653filename. This file will be opened for writing and the compressed data
654will be written to it.
655
656=item A filehandle
657
658If the C<$output> parameter is a filehandle, the compressed data will be
659written to it.
660The string '-' can be used as an alias for standard output.
661
662=item A scalar reference
663
664If C<$output> is a scalar reference, the compressed data will be stored
665in C<$$output>.
666
667=back
668
669If the C<$output> parameter is any other type, C<IO::Compress::Gzip>::new will
670return undef.
671
672=head2 Constructor Options
673
674C<OPTS> is any combination of zero or more the following options:
675
676=over 5
677
678=item C<< AutoClose => 0|1 >>
679
680This option is only valid when the C<$output> parameter is a filehandle. If
681specified, and the value is true, it will result in the C<$output> being
682closed once either the C<close> method is called or the C<IO::Compress::Gzip>
683object is destroyed.
684
685This parameter defaults to 0.
686
687=item C<< Append => 0|1 >>
688
689Opens C<$output> in append mode.
690
691The behaviour of this option is dependent on the type of C<$output>.
692
693=over 5
694
695=item * A Buffer
696
697If C<$output> is a buffer and C<Append> is enabled, all compressed data
698will be append to the end of C<$output>. Otherwise C<$output> will be
699cleared before any data is written to it.
700
701=item * A Filename
702
703If C<$output> is a filename and C<Append> is enabled, the file will be
704opened in append mode. Otherwise the contents of the file, if any, will be
705truncated before any compressed data is written to it.
706
707=item * A Filehandle
708
709If C<$output> is a filehandle, the file pointer will be positioned to the
710end of the file via a call to C<seek> before any compressed data is written
711to it.  Otherwise the file pointer will not be moved.
712
713=back
714
715This parameter defaults to 0.
716
717=item C<< Merge => 0|1 >>
718
719This option is used to compress input data and append it to an existing
720compressed data stream in C<$output>. The end result is a single compressed
721data stream stored in C<$output>.
722
723It is a fatal error to attempt to use this option when C<$output> is not an
724RFC 1952 data stream.
725
726There are a number of other limitations with the C<Merge> option:
727
728=over 5
729
730=item 1
731
732This module needs to have been built with zlib 1.2.1 or better to work. A
733fatal error will be thrown if C<Merge> is used with an older version of
734zlib.
735
736=item 2
737
738If C<$output> is a file or a filehandle, it must be seekable.
739
740=back
741
742This parameter defaults to 0.
743
744=item -Level
745
746Defines the compression level used by zlib. The value should either be
747a number between 0 and 9 (0 means no compression and 9 is maximum
748compression), or one of the symbolic constants defined below.
749
750   Z_NO_COMPRESSION
751   Z_BEST_SPEED
752   Z_BEST_COMPRESSION
753   Z_DEFAULT_COMPRESSION
754
755The default is Z_DEFAULT_COMPRESSION.
756
757Note, these constants are not imported by C<IO::Compress::Gzip> by default.
758
759    use IO::Compress::Gzip qw(:strategy);
760    use IO::Compress::Gzip qw(:constants);
761    use IO::Compress::Gzip qw(:all);
762
763=item -Strategy
764
765Defines the strategy used to tune the compression. Use one of the symbolic
766constants defined below.
767
768   Z_FILTERED
769   Z_HUFFMAN_ONLY
770   Z_RLE
771   Z_FIXED
772   Z_DEFAULT_STRATEGY
773
774The default is Z_DEFAULT_STRATEGY.
775
776=item C<< Minimal => 0|1 >>
777
778If specified, this option will force the creation of the smallest possible
779compliant gzip header (which is exactly 10 bytes long) as defined in
780RFC 1952.
781
782See the section titled "Compliance" in RFC 1952 for a definition
783of the values used for the fields in the gzip header.
784
785All other parameters that control the content of the gzip header will
786be ignored if this parameter is set to 1.
787
788This parameter defaults to 0.
789
790=item C<< Comment => $comment >>
791
792Stores the contents of C<$comment> in the COMMENT field in
793the gzip header.
794By default, no comment field is written to the gzip file.
795
796If the C<-Strict> option is enabled, the comment can only consist of ISO
7978859-1 characters plus line feed.
798
799If the C<-Strict> option is disabled, the comment field can contain any
800character except NULL. If any null characters are present, the field
801will be truncated at the first NULL.
802
803=item C<< Name => $string >>
804
805Stores the contents of C<$string> in the gzip NAME header field. If
806C<Name> is not specified, no gzip NAME field will be created.
807
808If the C<-Strict> option is enabled, C<$string> can only consist of ISO
8098859-1 characters.
810
811If C<-Strict> is disabled, then C<$string> can contain any character
812except NULL. If any null characters are present, the field will be
813truncated at the first NULL.
814
815=item C<< Time => $number >>
816
817Sets the MTIME field in the gzip header to $number.
818
819This field defaults to the time the C<IO::Compress::Gzip> object was created
820if this option is not specified.
821
822=item C<< TextFlag => 0|1 >>
823
824This parameter controls the setting of the FLG.FTEXT bit in the gzip
825header. It is used to signal that the data stored in the gzip file/buffer
826is probably text.
827
828The default is 0.
829
830=item C<< HeaderCRC => 0|1 >>
831
832When true this parameter will set the FLG.FHCRC bit to 1 in the gzip header
833and set the CRC16 header field to the CRC of the complete gzip header
834except the CRC16 field itself.
835
836B<Note> that gzip files created with the C<HeaderCRC> flag set to 1 cannot
837be read by most, if not all, of the standard gunzip utilities, most
838notably gzip version 1.2.4. You should therefore avoid using this option if
839you want to maximize the portability of your gzip files.
840
841This parameter defaults to 0.
842
843=item C<< OS_Code => $value >>
844
845Stores C<$value> in the gzip OS header field. A number between 0 and 255 is
846valid.
847
848If not specified, this parameter defaults to the OS code of the Operating
849System this module was built on. The value 3 is used as a catch-all for all
850Unix variants and unknown Operating Systems.
851
852=item C<< ExtraField => $data >>
853
854This parameter allows additional metadata to be stored in the ExtraField in
855the gzip header. An RFC 1952 compliant ExtraField consists of zero or more
856subfields. Each subfield consists of a two byte header followed by the
857subfield data.
858
859The list of subfields can be supplied in any of the following formats
860
861    -ExtraField => [$id1, $data1,
862                    $id2, $data2,
863                     ...
864                   ]
865    -ExtraField => [ [$id1 => $data1],
866                     [$id2 => $data2],
867                     ...
868                   ]
869    -ExtraField => { $id1 => $data1,
870                     $id2 => $data2,
871                     ...
872                   }
873
874Where C<$id1>, C<$id2> are two byte subfield ID's. The second byte of
875the ID cannot be 0, unless the C<Strict> option has been disabled.
876
877If you use the hash syntax, you have no control over the order in which
878the ExtraSubFields are stored, plus you cannot have SubFields with
879duplicate ID.
880
881Alternatively the list of subfields can by supplied as a scalar, thus
882
883    -ExtraField => $rawdata
884
885If you use the raw format, and the C<Strict> option is enabled,
886C<IO::Compress::Gzip> will check that C<$rawdata> consists of zero or more
887conformant sub-fields. When C<Strict> is disabled, C<$rawdata> can
888consist of any arbitrary byte stream.
889
890The maximum size of the Extra Field 65535 bytes.
891
892=item C<< ExtraFlags => $value >>
893
894Sets the XFL byte in the gzip header to C<$value>.
895
896If this option is not present, the value stored in XFL field will be
897determined by the setting of the C<Level> option.
898
899If C<< Level => Z_BEST_SPEED >> has been specified then XFL is set to 2.
900If C<< Level => Z_BEST_COMPRESSION >> has been specified then XFL is set to 4.
901Otherwise XFL is set to 0.
902
903=item C<< Strict => 0|1 >>
904
905C<Strict> will optionally police the values supplied with other options
906to ensure they are compliant with RFC1952.
907
908This option is enabled by default.
909
910If C<Strict> is enabled the following behaviour will be policed:
911
912=over 5
913
914=item *
915
916The value supplied with the C<Name> option can only contain ISO 8859-1
917characters.
918
919=item *
920
921The value supplied with the C<Comment> option can only contain ISO 8859-1
922characters plus line-feed.
923
924=item *
925
926The values supplied with the C<-Name> and C<-Comment> options cannot
927contain multiple embedded nulls.
928
929=item *
930
931If an C<ExtraField> option is specified and it is a simple scalar,
932it must conform to the sub-field structure as defined in RFC 1952.
933
934=item *
935
936If an C<ExtraField> option is specified the second byte of the ID will be
937checked in each subfield to ensure that it does not contain the reserved
938value 0x00.
939
940=back
941
942When C<Strict> is disabled the following behaviour will be policed:
943
944=over 5
945
946=item *
947
948The value supplied with C<-Name> option can contain
949any character except NULL.
950
951=item *
952
953The value supplied with C<-Comment> option can contain any character
954except NULL.
955
956=item *
957
958The values supplied with the C<-Name> and C<-Comment> options can contain
959multiple embedded nulls. The string written to the gzip header will
960consist of the characters up to, but not including, the first embedded
961NULL.
962
963=item *
964
965If an C<ExtraField> option is specified and it is a simple scalar, the
966structure will not be checked. The only error is if the length is too big.
967
968=item *
969
970The ID header in an C<ExtraField> sub-field can consist of any two bytes.
971
972=back
973
974=back
975
976=head2 Examples
977
978=head3 Streaming
979
980This very simple command line example demonstrates the streaming capabilities
981of the module. The code reads data from STDIN or all the files given on the
982commandline, compresses it, and writes the compressed data to STDOUT.
983
984    use strict ;
985    use warnings ;
986    use IO::Compress::Gzip qw(gzip $GzipError) ;
987
988    my $z = IO::Compress::Gzip->new("-", Stream => 1)
989        or die "IO::Compress::Gzip failed: $GzipError\n";
990
991    while (<>) {
992        $z->print("abcde");
993    }
994    $z->close();
995
996Note the use of C<"-"> to means C<STDOUT>. Alternatively you can use C<\*STDOUT>.
997
998=head3 Compressing a file from the filesystem
999
1000To read the contents of the file C<file1.txt> and write the compressed
1001data to the file C<file1.txt.gz> there are a few options
1002
1003Start by creating the compression object and opening the input file
1004
1005    use strict ;
1006    use warnings ;
1007    use IO::Compress::Gzip qw(gzip $GzipError) ;
1008
1009    my $input = "file1.txt";
1010    my $z = IO::Compress::Gzip->new("file1.txt.gz")
1011        or die "IO::Compress::Gzip failed: $GzipError\n";
1012
1013    # open the input file
1014    open my $fh, "<", "file1.txt"
1015        or die "Cannot open file1.txt: $!\n";
1016
1017    # loop through the input file & write to the compressed file
1018    while (<$fh>) {
1019        $z->print($_);
1020    }
1021
1022    # not forgetting to close the compressed file
1023    $z->close();
1024
1025=head1 Methods
1026
1027=head2 print
1028
1029Usage is
1030
1031    $z->print($data)
1032    print $z $data
1033
1034Compresses and outputs the contents of the C<$data> parameter. This
1035has the same behaviour as the C<print> built-in.
1036
1037Returns true if successful.
1038
1039=head2 printf
1040
1041Usage is
1042
1043    $z->printf($format, $data)
1044    printf $z $format, $data
1045
1046Compresses and outputs the contents of the C<$data> parameter.
1047
1048Returns true if successful.
1049
1050=head2 syswrite
1051
1052Usage is
1053
1054    $z->syswrite $data
1055    $z->syswrite $data, $length
1056    $z->syswrite $data, $length, $offset
1057
1058Compresses and outputs the contents of the C<$data> parameter.
1059
1060Returns the number of uncompressed bytes written, or C<undef> if
1061unsuccessful.
1062
1063=head2 write
1064
1065Usage is
1066
1067    $z->write $data
1068    $z->write $data, $length
1069    $z->write $data, $length, $offset
1070
1071Compresses and outputs the contents of the C<$data> parameter.
1072
1073Returns the number of uncompressed bytes written, or C<undef> if
1074unsuccessful.
1075
1076=head2 flush
1077
1078Usage is
1079
1080    $z->flush;
1081    $z->flush($flush_type);
1082
1083Flushes any pending compressed data to the output file/buffer.
1084
1085This method takes an optional parameter, C<$flush_type>, that controls
1086how the flushing will be carried out. By default the C<$flush_type>
1087used is C<Z_FINISH>. Other valid values for C<$flush_type> are
1088C<Z_NO_FLUSH>, C<Z_SYNC_FLUSH>, C<Z_FULL_FLUSH> and C<Z_BLOCK>. It is
1089strongly recommended that you only set the C<flush_type> parameter if
1090you fully understand the implications of what it does - overuse of C<flush>
1091can seriously degrade the level of compression achieved. See the C<zlib>
1092documentation for details.
1093
1094Returns true on success.
1095
1096=head2 tell
1097
1098Usage is
1099
1100    $z->tell()
1101    tell $z
1102
1103Returns the uncompressed file offset.
1104
1105=head2 eof
1106
1107Usage is
1108
1109    $z->eof();
1110    eof($z);
1111
1112Returns true if the C<close> method has been called.
1113
1114=head2 seek
1115
1116    $z->seek($position, $whence);
1117    seek($z, $position, $whence);
1118
1119Provides a sub-set of the C<seek> functionality, with the restriction
1120that it is only legal to seek forward in the output file/buffer.
1121It is a fatal error to attempt to seek backward.
1122
1123Empty parts of the file/buffer will have NULL (0x00) bytes written to them.
1124
1125The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1126SEEK_CUR or SEEK_END.
1127
1128Returns 1 on success, 0 on failure.
1129
1130=head2 binmode
1131
1132Usage is
1133
1134    $z->binmode
1135    binmode $z ;
1136
1137This is a noop provided for completeness.
1138
1139=head2 opened
1140
1141    $z->opened()
1142
1143Returns true if the object currently refers to a opened file/buffer.
1144
1145=head2 autoflush
1146
1147    my $prev = $z->autoflush()
1148    my $prev = $z->autoflush(EXPR)
1149
1150If the C<$z> object is associated with a file or a filehandle, this method
1151returns the current autoflush setting for the underlying filehandle. If
1152C<EXPR> is present, and is non-zero, it will enable flushing after every
1153write/print operation.
1154
1155If C<$z> is associated with a buffer, this method has no effect and always
1156returns C<undef>.
1157
1158B<Note> that the special variable C<$|> B<cannot> be used to set or
1159retrieve the autoflush setting.
1160
1161=head2 input_line_number
1162
1163    $z->input_line_number()
1164    $z->input_line_number(EXPR)
1165
1166This method always returns C<undef> when compressing.
1167
1168=head2 fileno
1169
1170    $z->fileno()
1171    fileno($z)
1172
1173If the C<$z> object is associated with a file or a filehandle, C<fileno>
1174will return the underlying file descriptor. Once the C<close> method is
1175called C<fileno> will return C<undef>.
1176
1177If the C<$z> object is associated with a buffer, this method will return
1178C<undef>.
1179
1180=head2 close
1181
1182    $z->close() ;
1183    close $z ;
1184
1185Flushes any pending compressed data and then closes the output file/buffer.
1186
1187For most versions of Perl this method will be automatically invoked if
1188the IO::Compress::Gzip object is destroyed (either explicitly or by the
1189variable with the reference to the object going out of scope). The
1190exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1191these cases, the C<close> method will be called automatically, but
1192not until global destruction of all live objects when the program is
1193terminating.
1194
1195Therefore, if you want your scripts to be able to run on all versions
1196of Perl, you should call C<close> explicitly and not rely on automatic
1197closing.
1198
1199Returns true on success, otherwise 0.
1200
1201If the C<AutoClose> option has been enabled when the IO::Compress::Gzip
1202object was created, and the object is associated with a file, the
1203underlying file will also be closed.
1204
1205=head2 newStream([OPTS])
1206
1207Usage is
1208
1209    $z->newStream( [OPTS] )
1210
1211Closes the current compressed data stream and starts a new one.
1212
1213OPTS consists of any of the options that are available when creating
1214the C<$z> object.
1215
1216See the L</"Constructor Options"> section for more details.
1217
1218=head2 deflateParams
1219
1220Usage is
1221
1222    $z->deflateParams
1223
1224TODO
1225
1226=head1 Importing
1227
1228A number of symbolic constants are required by some methods in
1229C<IO::Compress::Gzip>. None are imported by default.
1230
1231=over 5
1232
1233=item :all
1234
1235Imports C<gzip>, C<$GzipError> and all symbolic
1236constants that can be used by C<IO::Compress::Gzip>. Same as doing this
1237
1238    use IO::Compress::Gzip qw(gzip $GzipError :constants) ;
1239
1240=item :constants
1241
1242Import all symbolic constants. Same as doing this
1243
1244    use IO::Compress::Gzip qw(:flush :level :strategy) ;
1245
1246=item :flush
1247
1248These symbolic constants are used by the C<flush> method.
1249
1250    Z_NO_FLUSH
1251    Z_PARTIAL_FLUSH
1252    Z_SYNC_FLUSH
1253    Z_FULL_FLUSH
1254    Z_FINISH
1255    Z_BLOCK
1256
1257=item :level
1258
1259These symbolic constants are used by the C<Level> option in the constructor.
1260
1261    Z_NO_COMPRESSION
1262    Z_BEST_SPEED
1263    Z_BEST_COMPRESSION
1264    Z_DEFAULT_COMPRESSION
1265
1266=item :strategy
1267
1268These symbolic constants are used by the C<Strategy> option in the constructor.
1269
1270    Z_FILTERED
1271    Z_HUFFMAN_ONLY
1272    Z_RLE
1273    Z_FIXED
1274    Z_DEFAULT_STRATEGY
1275
1276=back
1277
1278=head1 EXAMPLES
1279
1280=head2 Apache::GZip Revisited
1281
1282See L<IO::Compress::FAQ|IO::Compress::FAQ/"Apache::GZip Revisited">
1283
1284=head2 Working with Net::FTP
1285
1286See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP">
1287
1288=head1 SUPPORT
1289
1290General feedback/questions/bug reports should be sent to
1291L<https://github.com/pmqs/IO-Copress/issues> (preferred) or
1292L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Copress>.
1293
1294=head1 SEE ALSO
1295
1296L<Compress::Zlib>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1297
1298L<IO::Compress::FAQ|IO::Compress::FAQ>
1299
1300L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1301L<Archive::Tar|Archive::Tar>,
1302L<IO::Zlib|IO::Zlib>
1303
1304For RFC 1950, 1951 and 1952 see
1305L<https://datatracker.ietf.org/doc/html/rfc1950>,
1306L<https://datatracker.ietf.org/doc/html/rfc1951> and
1307L<https://datatracker.ietf.org/doc/html/rfc1952>
1308
1309The I<zlib> compression library was written by Jean-loup Gailly
1310C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>.
1311
1312The primary site for the I<zlib> compression library is
1313L<http://www.zlib.org>.
1314
1315The primary site for the I<zlib-ng> compression library is
1316L<https://github.com/zlib-ng/zlib-ng>.
1317
1318The primary site for gzip is L<http://www.gzip.org>.
1319
1320=head1 AUTHOR
1321
1322This module was written by Paul Marquess, C<pmqs@cpan.org>.
1323
1324=head1 MODIFICATION HISTORY
1325
1326See the Changes file.
1327
1328=head1 COPYRIGHT AND LICENSE
1329
1330Copyright (c) 2005-2024 Paul Marquess. All rights reserved.
1331
1332This program is free software; you can redistribute it and/or
1333modify it under the same terms as Perl itself.
1334