xref: /minix3/common/dist/zlib/examples/zlib_how.html (revision 44bedb31d842b4b0444105519bcf929a69fe2dc1)
1*44bedb31SLionel Sambuc<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
2*44bedb31SLionel Sambuc  "http://www.w3.org/TR/REC-html40/loose.dtd">
3*44bedb31SLionel Sambuc<html>
4*44bedb31SLionel Sambuc<head>
5*44bedb31SLionel Sambuc<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
6*44bedb31SLionel Sambuc<title>zlib Usage Example</title>
7*44bedb31SLionel Sambuc<!--  Copyright (c) 2004 Mark Adler.  -->
8*44bedb31SLionel Sambuc</head>
9*44bedb31SLionel Sambuc<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000">
10*44bedb31SLionel Sambuc<h2 align="center"> zlib Usage Example </h2>
11*44bedb31SLionel SambucWe often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used.
12*44bedb31SLionel SambucUsers wonder when they should provide more input, when they should use more output,
13*44bedb31SLionel Sambucwhat to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and
14*44bedb31SLionel Sambucso on.  So for those who have read <tt>zlib.h</tt> (a few times), and
15*44bedb31SLionel Sambucwould like further edification, below is an annotated example in C of simple routines to compress and decompress
16*44bedb31SLionel Sambucfrom an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively.  The
17*44bedb31SLionel Sambucannotations are interspersed between lines of the code.  So please read between the lines.
18*44bedb31SLionel SambucWe hope this helps explain some of the intricacies of <em>zlib</em>.
19*44bedb31SLionel Sambuc<p>
20*44bedb31SLionel SambucWithout further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>:
21*44bedb31SLionel Sambuc<pre><b>
22*44bedb31SLionel Sambuc/* zpipe.c: example of proper use of zlib's inflate() and deflate()
23*44bedb31SLionel Sambuc   Not copyrighted -- provided to the public domain
24*44bedb31SLionel Sambuc   Version 1.2  9 November 2004  Mark Adler */
25*44bedb31SLionel Sambuc
26*44bedb31SLionel Sambuc/* Version history:
27*44bedb31SLionel Sambuc   1.0  30 Oct 2004  First version
28*44bedb31SLionel Sambuc   1.1   8 Nov 2004  Add void casting for unused return values
29*44bedb31SLionel Sambuc                     Use switch statement for inflate() return values
30*44bedb31SLionel Sambuc   1.2   9 Nov 2004  Add assertions to document zlib guarantees
31*44bedb31SLionel Sambuc */
32*44bedb31SLionel Sambuc</b></pre><!-- -->
33*44bedb31SLionel SambucWe now include the header files for the required definitions.  From
34*44bedb31SLionel Sambuc<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>,
35*44bedb31SLionel Sambuc<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and
36*44bedb31SLionel Sambuc<tt>fputs()</tt> for error messages.  From <tt>string.h</tt> we use
37*44bedb31SLionel Sambuc<tt>strcmp()</tt> for command line argument processing.
38*44bedb31SLionel SambucFrom <tt>assert.h</tt> we use the <tt>assert()</tt> macro.
39*44bedb31SLionel SambucFrom <tt>zlib.h</tt>
40*44bedb31SLionel Sambucwe use the basic compression functions <tt>deflateInit()</tt>,
41*44bedb31SLionel Sambuc<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression
42*44bedb31SLionel Sambucfunctions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and
43*44bedb31SLionel Sambuc<tt>inflateEnd()</tt>.
44*44bedb31SLionel Sambuc<pre><b>
45*44bedb31SLionel Sambuc#include &lt;stdio.h&gt;
46*44bedb31SLionel Sambuc#include &lt;string.h&gt;
47*44bedb31SLionel Sambuc#include &lt;assert.h&gt;
48*44bedb31SLionel Sambuc#include "zlib.h"
49*44bedb31SLionel Sambuc</b></pre><!-- -->
50*44bedb31SLionel Sambuc<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data
51*44bedb31SLionel Sambucfrom the <em>zlib</em> routines.  Larger buffer sizes would be more efficient,
52*44bedb31SLionel Sambucespecially for <tt>inflate()</tt>.  If the memory is available, buffers sizes
53*44bedb31SLionel Sambucon the order of 128K or 256K bytes should be used.
54*44bedb31SLionel Sambuc<pre><b>
55*44bedb31SLionel Sambuc#define CHUNK 16384
56*44bedb31SLionel Sambuc</b></pre><!-- -->
57*44bedb31SLionel SambucThe <tt>def()</tt> routine compresses data from an input file to an output file.  The output data
58*44bedb31SLionel Sambucwill be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em>
59*44bedb31SLionel Sambucformats.  The <em>zlib</em> format has a very small header of only two bytes to identify it as
60*44bedb31SLionel Sambuca <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast
61*44bedb31SLionel Sambuccheck value to verify the integrity of the uncompressed data after decoding.
62*44bedb31SLionel Sambuc<pre><b>
63*44bedb31SLionel Sambuc/* Compress from file source to file dest until EOF on source.
64*44bedb31SLionel Sambuc   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
65*44bedb31SLionel Sambuc   allocated for processing, Z_STREAM_ERROR if an invalid compression
66*44bedb31SLionel Sambuc   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
67*44bedb31SLionel Sambuc   version of the library linked do not match, or Z_ERRNO if there is
68*44bedb31SLionel Sambuc   an error reading or writing the files. */
69*44bedb31SLionel Sambucint def(FILE *source, FILE *dest, int level)
70*44bedb31SLionel Sambuc{
71*44bedb31SLionel Sambuc</b></pre>
72*44bedb31SLionel SambucHere are the local variables for <tt>def()</tt>.  <tt>ret</tt> will be used for <em>zlib</em>
73*44bedb31SLionel Sambucreturn codes.  <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>,
74*44bedb31SLionel Sambucwhich is either no flushing, or flush to completion after the end of the input file is reached.
75*44bedb31SLionel Sambuc<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>.  The <tt>strm</tt> structure
76*44bedb31SLionel Sambucis used to pass information to and from the <em>zlib</em> routines, and to maintain the
77*44bedb31SLionel Sambuc<tt>deflate()</tt> state.  <tt>in</tt> and <tt>out</tt> are the input and output buffers for
78*44bedb31SLionel Sambuc<tt>deflate()</tt>.
79*44bedb31SLionel Sambuc<pre><b>
80*44bedb31SLionel Sambuc    int ret, flush;
81*44bedb31SLionel Sambuc    unsigned have;
82*44bedb31SLionel Sambuc    z_stream strm;
83*44bedb31SLionel Sambuc    char in[CHUNK];
84*44bedb31SLionel Sambuc    char out[CHUNK];
85*44bedb31SLionel Sambuc</b></pre><!-- -->
86*44bedb31SLionel SambucThe first thing we do is to initialize the <em>zlib</em> state for compression using
87*44bedb31SLionel Sambuc<tt>deflateInit()</tt>.  This must be done before the first use of <tt>deflate()</tt>.
88*44bedb31SLionel SambucThe <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt>
89*44bedb31SLionel Sambucstructure must be initialized before calling <tt>deflateInit()</tt>.  Here they are
90*44bedb31SLionel Sambucset to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use
91*44bedb31SLionel Sambucthe default memory allocation routines.  An application may also choose to provide
92*44bedb31SLionel Sambuccustom memory allocation routines here.  <tt>deflateInit()</tt> will allocate on the
93*44bedb31SLionel Sambucorder of 256K bytes for the internal state.
94*44bedb31SLionel Sambuc(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.)
95*44bedb31SLionel Sambuc<p>
96*44bedb31SLionel Sambuc<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and
97*44bedb31SLionel Sambucthe compression level, which is an integer in the range of -1 to 9.  Lower compression
98*44bedb31SLionel Sambuclevels result in faster execution, but less compression.  Higher levels result in
99*44bedb31SLionel Sambucgreater compression, but slower execution.  The <em>zlib</em> constant Z_DEFAULT_COMPRESSION,
100*44bedb31SLionel Sambucequal to -1,
101*44bedb31SLionel Sambucprovides a good compromise between compression and speed and is equivalent to level 6.
102*44bedb31SLionel SambucLevel 0 actually does no compression at all, and in fact expands the data slightly to produce
103*44bedb31SLionel Sambucthe <em>zlib</em> format (it is not a byte-for-byte copy of the input).
104*44bedb31SLionel SambucMore advanced applications of <em>zlib</em>
105*44bedb31SLionel Sambucmay use <tt>deflateInit2()</tt> here instead.  Such an application may want to reduce how
106*44bedb31SLionel Sambucmuch memory will be used, at some price in compression.  Or it may need to request a
107*44bedb31SLionel Sambuc<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw
108*44bedb31SLionel Sambucencoding with no header or trailer at all.
109*44bedb31SLionel Sambuc<p>
110*44bedb31SLionel SambucWe must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant
111*44bedb31SLionel Sambuc<tt>Z_OK</tt> to make sure that it was able to
112*44bedb31SLionel Sambucallocate memory for the internal state, and that the provided arguments were valid.
113*44bedb31SLionel Sambuc<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt>
114*44bedb31SLionel Sambucfile came from matches the version of <em>zlib</em> actually linked with the program.  This
115*44bedb31SLionel Sambucis especially important for environments in which <em>zlib</em> is a shared library.
116*44bedb31SLionel Sambuc<p>
117*44bedb31SLionel SambucNote that an application can initialize multiple, independent <em>zlib</em> streams, which can
118*44bedb31SLionel Sambucoperate in parallel.  The state information maintained in the structure allows the <em>zlib</em>
119*44bedb31SLionel Sambucroutines to be reentrant.
120*44bedb31SLionel Sambuc<pre><b>
121*44bedb31SLionel Sambuc    /* allocate deflate state */
122*44bedb31SLionel Sambuc    strm.zalloc = Z_NULL;
123*44bedb31SLionel Sambuc    strm.zfree = Z_NULL;
124*44bedb31SLionel Sambuc    strm.opaque = Z_NULL;
125*44bedb31SLionel Sambuc    ret = deflateInit(&amp;strm, level);
126*44bedb31SLionel Sambuc    if (ret != Z_OK)
127*44bedb31SLionel Sambuc        return ret;
128*44bedb31SLionel Sambuc</b></pre><!-- -->
129*44bedb31SLionel SambucWith the pleasantries out of the way, now we can get down to business.  The outer <tt>do</tt>-loop
130*44bedb31SLionel Sambucreads all of the input file and exits at the bottom of the loop once end-of-file is reached.
131*44bedb31SLionel SambucThis loop contains the only call of <tt>deflate()</tt>.  So we must make sure that all of the
132*44bedb31SLionel Sambucinput data has been processed and that all of the output data has been generated and consumed
133*44bedb31SLionel Sambucbefore we fall out of the loop at the bottom.
134*44bedb31SLionel Sambuc<pre><b>
135*44bedb31SLionel Sambuc    /* compress until end of file */
136*44bedb31SLionel Sambuc    do {
137*44bedb31SLionel Sambuc</b></pre>
138*44bedb31SLionel SambucWe start off by reading data from the input file.  The number of bytes read is put directly
139*44bedb31SLionel Sambucinto <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>.  We also
140*44bedb31SLionel Sambuccheck to see if end-of-file on the input has been reached.  If we are at the end of file, then <tt>flush</tt> is set to the
141*44bedb31SLionel Sambuc<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to
142*44bedb31SLionel Sambucindicate that this is the last chunk of input data to compress.  We need to use <tt>feof()</tt>
143*44bedb31SLionel Sambucto check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read.  The
144*44bedb31SLionel Sambucreason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss
145*44bedb31SLionel Sambucthe fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish
146*44bedb31SLionel Sambucup the compressed stream.  If we are not yet at the end of the input, then the <em>zlib</em>
147*44bedb31SLionel Sambucconstant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still
148*44bedb31SLionel Sambucin the middle of the uncompressed data.
149*44bedb31SLionel Sambuc<p>
150*44bedb31SLionel SambucIf there is an error in reading from the input file, the process is aborted with
151*44bedb31SLionel Sambuc<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning
152*44bedb31SLionel Sambucthe error.  We wouldn't want a memory leak, now would we?  <tt>deflateEnd()</tt> can be called
153*44bedb31SLionel Sambucat any time after the state has been initialized.  Once that's done, <tt>deflateInit()</tt> (or
154*44bedb31SLionel Sambuc<tt>deflateInit2()</tt>) would have to be called to start a new compression process.  There is
155*44bedb31SLionel Sambucno point here in checking the <tt>deflateEnd()</tt> return code.  The deallocation can't fail.
156*44bedb31SLionel Sambuc<pre><b>
157*44bedb31SLionel Sambuc        strm.avail_in = fread(in, 1, CHUNK, source);
158*44bedb31SLionel Sambuc        if (ferror(source)) {
159*44bedb31SLionel Sambuc            (void)deflateEnd(&amp;strm);
160*44bedb31SLionel Sambuc            return Z_ERRNO;
161*44bedb31SLionel Sambuc        }
162*44bedb31SLionel Sambuc        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
163*44bedb31SLionel Sambuc        strm.next_in = in;
164*44bedb31SLionel Sambuc</b></pre><!-- -->
165*44bedb31SLionel SambucThe inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then
166*44bedb31SLionel Sambuckeeps calling <tt>deflate()</tt> until it is done producing output.  Once there is no more
167*44bedb31SLionel Sambucnew output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e.,
168*44bedb31SLionel Sambuc<tt>avail_in</tt> will be zero.
169*44bedb31SLionel Sambuc<pre><b>
170*44bedb31SLionel Sambuc        /* run deflate() on input until output buffer not full, finish
171*44bedb31SLionel Sambuc           compression if all of source has been read in */
172*44bedb31SLionel Sambuc        do {
173*44bedb31SLionel Sambuc</b></pre>
174*44bedb31SLionel SambucOutput space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number
175*44bedb31SLionel Sambucof available output bytes and <tt>next_out</tt> to a pointer to that space.
176*44bedb31SLionel Sambuc<pre><b>
177*44bedb31SLionel Sambuc            strm.avail_out = CHUNK;
178*44bedb31SLionel Sambuc            strm.next_out = out;
179*44bedb31SLionel Sambuc</b></pre>
180*44bedb31SLionel SambucNow we call the compression engine itself, <tt>deflate()</tt>.  It takes as many of the
181*44bedb31SLionel Sambuc<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as
182*44bedb31SLionel Sambuc<tt>avail_out</tt> bytes to <tt>next_out</tt>.  Those counters and pointers are then
183*44bedb31SLionel Sambucupdated past the input data consumed and the output data written.  It is the amount of
184*44bedb31SLionel Sambucoutput space available that may limit how much input is consumed.
185*44bedb31SLionel SambucHence the inner loop to make sure that
186*44bedb31SLionel Sambucall of the input is consumed by providing more output space each time.  Since <tt>avail_in</tt>
187*44bedb31SLionel Sambucand <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those
188*44bedb31SLionel Sambucbetween <tt>deflate()</tt> calls until it's all used up.
189*44bedb31SLionel Sambuc<p>
190*44bedb31SLionel SambucThe parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing
191*44bedb31SLionel Sambucthe input and output information and the internal compression engine state, and a parameter
192*44bedb31SLionel Sambucindicating whether and how to flush data to the output.  Normally <tt>deflate</tt> will consume
193*44bedb31SLionel Sambucseveral K bytes of input data before producing any output (except for the header), in order
194*44bedb31SLionel Sambucto accumulate statistics on the data for optimum compression.  It will then put out a burst of
195*44bedb31SLionel Sambuccompressed data, and proceed to consume more input before the next burst.  Eventually,
196*44bedb31SLionel Sambuc<tt>deflate()</tt>
197*44bedb31SLionel Sambucmust be told to terminate the stream, complete the compression with provided input data, and
198*44bedb31SLionel Sambucwrite out the trailer check value.  <tt>deflate()</tt> will continue to compress normally as long
199*44bedb31SLionel Sambucas the flush parameter is <tt>Z_NO_FLUSH</tt>.  Once the <tt>Z_FINISH</tt> parameter is provided,
200*44bedb31SLionel Sambuc<tt>deflate()</tt> will begin to complete the compressed output stream.  However depending on how
201*44bedb31SLionel Sambucmuch output space is provided, <tt>deflate()</tt> may have to be called several times until it
202*44bedb31SLionel Sambuchas provided the complete compressed stream, even after it has consumed all of the input.  The flush
203*44bedb31SLionel Sambucparameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls.
204*44bedb31SLionel Sambuc<p>
205*44bedb31SLionel SambucThere are other values of the flush parameter that are used in more advanced applications.  You can
206*44bedb31SLionel Sambucforce <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided
207*44bedb31SLionel Sambucso far, even if it wouldn't have otherwise, for example to control data latency on a link with
208*44bedb31SLionel Sambuccompressed data.  You can also ask that <tt>deflate()</tt> do that as well as erase any history up to
209*44bedb31SLionel Sambucthat point so that what follows can be decompressed independently, for example for random access
210*44bedb31SLionel Sambucapplications.  Both requests will degrade compression by an amount depending on how often such
211*44bedb31SLionel Sambucrequests are made.
212*44bedb31SLionel Sambuc<p>
213*44bedb31SLionel Sambuc<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here.  Why
214*44bedb31SLionel Sambucnot?  Well, it turns out that <tt>deflate()</tt> can do no wrong here.  Let's go through
215*44bedb31SLionel Sambuc<tt>deflate()</tt>'s return values and dispense with them one by one.  The possible values are
216*44bedb31SLionel Sambuc<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>.  <tt>Z_OK</tt>
217*44bedb31SLionel Sambucis, well, ok.  <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of
218*44bedb31SLionel Sambuc<tt>deflate()</tt>.  This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt>
219*44bedb31SLionel Sambucuntil it has no more output.  <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not
220*44bedb31SLionel Sambucinitialized properly, but we did initialize it properly.  There is no harm in checking for
221*44bedb31SLionel Sambuc<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some
222*44bedb31SLionel Sambucother part of the application inadvertently clobbered the memory containing the <em>zlib</em> state.
223*44bedb31SLionel Sambuc<tt>Z_BUF_ERROR</tt> will be explained further below, but
224*44bedb31SLionel Sambucsuffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume
225*44bedb31SLionel Sambucmore input or produce more output.  <tt>deflate()</tt> can be called again with more output space
226*44bedb31SLionel Sambucor more available input, which it will be in this code.
227*44bedb31SLionel Sambuc<pre><b>
228*44bedb31SLionel Sambuc            ret = deflate(&amp;strm, flush);    /* no bad return value */
229*44bedb31SLionel Sambuc            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
230*44bedb31SLionel Sambuc</b></pre>
231*44bedb31SLionel SambucNow we compute how much output <tt>deflate()</tt> provided on the last call, which is the
232*44bedb31SLionel Sambucdifference between how much space was provided before the call, and how much output space
233*44bedb31SLionel Sambucis still available after the call.  Then that data, if any, is written to the output file.
234*44bedb31SLionel SambucWe can then reuse the output buffer for the next call of <tt>deflate()</tt>.  Again if there
235*44bedb31SLionel Sambucis a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak.
236*44bedb31SLionel Sambuc<pre><b>
237*44bedb31SLionel Sambuc            have = CHUNK - strm.avail_out;
238*44bedb31SLionel Sambuc            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
239*44bedb31SLionel Sambuc                (void)deflateEnd(&amp;strm);
240*44bedb31SLionel Sambuc                return Z_ERRNO;
241*44bedb31SLionel Sambuc            }
242*44bedb31SLionel Sambuc</b></pre>
243*44bedb31SLionel SambucThe inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the
244*44bedb31SLionel Sambucprovided output buffer.  Then we know that <tt>deflate()</tt> has done as much as it can with
245*44bedb31SLionel Sambucthe provided input, and that all of that input has been consumed.  We can then fall out of this
246*44bedb31SLionel Sambucloop and reuse the input buffer.
247*44bedb31SLionel Sambuc<p>
248*44bedb31SLionel SambucThe way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill
249*44bedb31SLionel Sambucthe output buffer, leaving <tt>avail_out</tt> greater than zero.  However suppose that
250*44bedb31SLionel Sambuc<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer!
251*44bedb31SLionel Sambuc<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can.
252*44bedb31SLionel SambucAs far as we know, <tt>deflate()</tt>
253*44bedb31SLionel Sambuchas more output for us.  So we call it again.  But now <tt>deflate()</tt> produces no output
254*44bedb31SLionel Sambucat all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>.  That <tt>deflate()</tt> call
255*44bedb31SLionel Sambucwasn't able to do anything, either consume input or produce output, and so it returns
256*44bedb31SLionel Sambuc<tt>Z_BUF_ERROR</tt>.  (See, I told you I'd cover this later.)  However this is not a problem at
257*44bedb31SLionel Sambucall.  Now we finally have the desired indication that <tt>deflate()</tt> is really done,
258*44bedb31SLionel Sambucand so we drop out of the inner loop to provide more input to <tt>deflate()</tt>.
259*44bedb31SLionel Sambuc<p>
260*44bedb31SLionel SambucWith <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will
261*44bedb31SLionel Sambuccomplete the output stream.  Once that is done, subsequent calls of <tt>deflate()</tt> would return
262*44bedb31SLionel Sambuc<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing
263*44bedb31SLionel Sambucuntil the state is reinitialized.
264*44bedb31SLionel Sambuc<p>
265*44bedb31SLionel SambucSome applications of <em>zlib</em> have two loops that call <tt>deflate()</tt>
266*44bedb31SLionel Sambucinstead of the single inner loop we have here.  The first loop would call
267*44bedb31SLionel Sambucwithout flushing and feed all of the data to <tt>deflate()</tt>.  The second loop would call
268*44bedb31SLionel Sambuc<tt>deflate()</tt> with no more
269*44bedb31SLionel Sambucdata and the <tt>Z_FINISH</tt> parameter to complete the process.  As you can see from this
270*44bedb31SLionel Sambucexample, that can be avoided by simply keeping track of the current flush state.
271*44bedb31SLionel Sambuc<pre><b>
272*44bedb31SLionel Sambuc        } while (strm.avail_out == 0);
273*44bedb31SLionel Sambuc        assert(strm.avail_in == 0);     /* all input will be used */
274*44bedb31SLionel Sambuc</b></pre><!-- -->
275*44bedb31SLionel SambucNow we check to see if we have already processed all of the input file.  That information was
276*44bedb31SLionel Sambucsaved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>.  If so,
277*44bedb31SLionel Sambucthen we're done and we fall out of the outer loop.  We're guaranteed to get <tt>Z_STREAM_END</tt>
278*44bedb31SLionel Sambucfrom the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was
279*44bedb31SLionel Sambucconsumed and all of the output was generated.
280*44bedb31SLionel Sambuc<pre><b>
281*44bedb31SLionel Sambuc        /* done when last data in file processed */
282*44bedb31SLionel Sambuc    } while (flush != Z_FINISH);
283*44bedb31SLionel Sambuc    assert(ret == Z_STREAM_END);        /* stream will be complete */
284*44bedb31SLionel Sambuc</b></pre><!-- -->
285*44bedb31SLionel SambucThe process is complete, but we still need to deallocate the state to avoid a memory leak
286*44bedb31SLionel Sambuc(or rather more like a memory hemorrhage if you didn't do this).  Then
287*44bedb31SLionel Sambucfinally we can return with a happy return value.
288*44bedb31SLionel Sambuc<pre><b>
289*44bedb31SLionel Sambuc    /* clean up and return */
290*44bedb31SLionel Sambuc    (void)deflateEnd(&amp;strm);
291*44bedb31SLionel Sambuc    return Z_OK;
292*44bedb31SLionel Sambuc}
293*44bedb31SLionel Sambuc</b></pre><!-- -->
294*44bedb31SLionel SambucNow we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt>
295*44bedb31SLionel Sambucdecompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the
296*44bedb31SLionel Sambucuncompressed data to the output file.  Much of the discussion above for <tt>def()</tt>
297*44bedb31SLionel Sambucapplies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between
298*44bedb31SLionel Sambucthe two.
299*44bedb31SLionel Sambuc<pre><b>
300*44bedb31SLionel Sambuc/* Decompress from file source to file dest until stream ends or EOF.
301*44bedb31SLionel Sambuc   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
302*44bedb31SLionel Sambuc   allocated for processing, Z_DATA_ERROR if the deflate data is
303*44bedb31SLionel Sambuc   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
304*44bedb31SLionel Sambuc   the version of the library linked do not match, or Z_ERRNO if there
305*44bedb31SLionel Sambuc   is an error reading or writing the files. */
306*44bedb31SLionel Sambucint inf(FILE *source, FILE *dest)
307*44bedb31SLionel Sambuc{
308*44bedb31SLionel Sambuc</b></pre>
309*44bedb31SLionel SambucThe local variables have the same functionality as they do for <tt>def()</tt>.  The
310*44bedb31SLionel Sambuconly difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt>
311*44bedb31SLionel Sambuccan tell from the <em>zlib</em> stream itself when the stream is complete.
312*44bedb31SLionel Sambuc<pre><b>
313*44bedb31SLionel Sambuc    int ret;
314*44bedb31SLionel Sambuc    unsigned have;
315*44bedb31SLionel Sambuc    z_stream strm;
316*44bedb31SLionel Sambuc    char in[CHUNK];
317*44bedb31SLionel Sambuc    char out[CHUNK];
318*44bedb31SLionel Sambuc</b></pre><!-- -->
319*44bedb31SLionel SambucThe initialization of the state is the same, except that there is no compression level,
320*44bedb31SLionel Sambucof course, and two more elements of the structure are initialized.  <tt>avail_in</tt>
321*44bedb31SLionel Sambucand <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>.  This
322*44bedb31SLionel Sambucis because the application has the option to provide the start of the zlib stream in
323*44bedb31SLionel Sambucorder for <tt>inflateInit()</tt> to have access to information about the compression
324*44bedb31SLionel Sambucmethod to aid in memory allocation.  In the current implementation of <em>zlib</em>
325*44bedb31SLionel Sambuc(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of
326*44bedb31SLionel Sambuc<tt>inflate()</tt> anyway.  However those fields must be initialized since later versions
327*44bedb31SLionel Sambucof <em>zlib</em> that provide more compression methods may take advantage of this interface.
328*44bedb31SLionel SambucIn any case, no decompression is performed by <tt>inflateInit()</tt>, so the
329*44bedb31SLionel Sambuc<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling.
330*44bedb31SLionel Sambuc<p>
331*44bedb31SLionel SambucHere <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to
332*44bedb31SLionel Sambucindicate that no input data is being provided.
333*44bedb31SLionel Sambuc<pre><b>
334*44bedb31SLionel Sambuc    /* allocate inflate state */
335*44bedb31SLionel Sambuc    strm.zalloc = Z_NULL;
336*44bedb31SLionel Sambuc    strm.zfree = Z_NULL;
337*44bedb31SLionel Sambuc    strm.opaque = Z_NULL;
338*44bedb31SLionel Sambuc    strm.avail_in = 0;
339*44bedb31SLionel Sambuc    strm.next_in = Z_NULL;
340*44bedb31SLionel Sambuc    ret = inflateInit(&amp;strm);
341*44bedb31SLionel Sambuc    if (ret != Z_OK)
342*44bedb31SLionel Sambuc        return ret;
343*44bedb31SLionel Sambuc</b></pre><!-- -->
344*44bedb31SLionel SambucThe outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates
345*44bedb31SLionel Sambucthat it has reached the end of the compressed data and has produced all of the uncompressed
346*44bedb31SLionel Sambucoutput.  This is in contrast to <tt>def()</tt> which processes all of the input file.
347*44bedb31SLionel SambucIf end-of-file is reached before the compressed data self-terminates, then the compressed
348*44bedb31SLionel Sambucdata is incomplete and an error is returned.
349*44bedb31SLionel Sambuc<pre><b>
350*44bedb31SLionel Sambuc    /* decompress until deflate stream ends or end of file */
351*44bedb31SLionel Sambuc    do {
352*44bedb31SLionel Sambuc</b></pre>
353*44bedb31SLionel SambucWe read input data and set the <tt>strm</tt> structure accordingly.  If we've reached the
354*44bedb31SLionel Sambucend of the input file, then we leave the outer loop and report an error, since the
355*44bedb31SLionel Sambuccompressed data is incomplete.  Note that we may read more data than is eventually consumed
356*44bedb31SLionel Sambucby <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream.
357*44bedb31SLionel SambucFor applications where <em>zlib</em> streams are embedded in other data, this routine would
358*44bedb31SLionel Sambucneed to be modified to return the unused data, or at least indicate how much of the input
359*44bedb31SLionel Sambucdata was not used, so the application would know where to pick up after the <em>zlib</em> stream.
360*44bedb31SLionel Sambuc<pre><b>
361*44bedb31SLionel Sambuc        strm.avail_in = fread(in, 1, CHUNK, source);
362*44bedb31SLionel Sambuc        if (ferror(source)) {
363*44bedb31SLionel Sambuc            (void)inflateEnd(&amp;strm);
364*44bedb31SLionel Sambuc            return Z_ERRNO;
365*44bedb31SLionel Sambuc        }
366*44bedb31SLionel Sambuc        if (strm.avail_in == 0)
367*44bedb31SLionel Sambuc            break;
368*44bedb31SLionel Sambuc        strm.next_in = in;
369*44bedb31SLionel Sambuc</b></pre><!-- -->
370*44bedb31SLionel SambucThe inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to
371*44bedb31SLionel Sambuckeep calling <tt>inflate()</tt> until has generated all of the output it can with the
372*44bedb31SLionel Sambucprovided input.
373*44bedb31SLionel Sambuc<pre><b>
374*44bedb31SLionel Sambuc        /* run inflate() on input until output buffer not full */
375*44bedb31SLionel Sambuc        do {
376*44bedb31SLionel Sambuc</b></pre>
377*44bedb31SLionel SambucJust like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>.
378*44bedb31SLionel Sambuc<pre><b>
379*44bedb31SLionel Sambuc            strm.avail_out = CHUNK;
380*44bedb31SLionel Sambuc            strm.next_out = out;
381*44bedb31SLionel Sambuc</b></pre>
382*44bedb31SLionel SambucNow we run the decompression engine itself.  There is no need to adjust the flush parameter, since
383*44bedb31SLionel Sambucthe <em>zlib</em> format is self-terminating. The main difference here is that there are
384*44bedb31SLionel Sambucreturn values that we need to pay attention to.  <tt>Z_DATA_ERROR</tt>
385*44bedb31SLionel Sambucindicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format,
386*44bedb31SLionel Sambucwhich means that either the data is not a <em>zlib</em> stream to begin with, or that the data was
387*44bedb31SLionel Sambuccorrupted somewhere along the way since it was compressed.  The other error to be processed is
388*44bedb31SLionel Sambuc<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt>
389*44bedb31SLionel Sambucneeds it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>.
390*44bedb31SLionel Sambuc<p>
391*44bedb31SLionel SambucAdvanced applications may use
392*44bedb31SLionel Sambuc<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the
393*44bedb31SLionel Sambucfirst 32K or so of compression.  This is noted in the <em>zlib</em> header, so <tt>inflate()</tt>
394*44bedb31SLionel Sambucrequests that that dictionary be provided before it can start to decompress.  Without the dictionary,
395*44bedb31SLionel Sambuccorrect decompression is not possible.  For this routine, we have no idea what the dictionary is,
396*44bedb31SLionel Sambucso the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>.
397*44bedb31SLionel Sambuc<p>
398*44bedb31SLionel Sambuc<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here,
399*44bedb31SLionel Sambucbut could be checked for as noted above for <tt>def()</tt>.  <tt>Z_BUF_ERROR</tt> does not need to be
400*44bedb31SLionel Sambucchecked for here, for the same reasons noted for <tt>def()</tt>.  <tt>Z_STREAM_END</tt> will be
401*44bedb31SLionel Sambucchecked for later.
402*44bedb31SLionel Sambuc<pre><b>
403*44bedb31SLionel Sambuc            ret = inflate(&amp;strm, Z_NO_FLUSH);
404*44bedb31SLionel Sambuc            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
405*44bedb31SLionel Sambuc            switch (ret) {
406*44bedb31SLionel Sambuc            case Z_NEED_DICT:
407*44bedb31SLionel Sambuc                ret = Z_DATA_ERROR;     /* and fall through */
408*44bedb31SLionel Sambuc            case Z_DATA_ERROR:
409*44bedb31SLionel Sambuc            case Z_MEM_ERROR:
410*44bedb31SLionel Sambuc                (void)inflateEnd(&amp;strm);
411*44bedb31SLionel Sambuc                return ret;
412*44bedb31SLionel Sambuc            }
413*44bedb31SLionel Sambuc</b></pre>
414*44bedb31SLionel SambucThe output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>.
415*44bedb31SLionel Sambuc<pre><b>
416*44bedb31SLionel Sambuc            have = CHUNK - strm.avail_out;
417*44bedb31SLionel Sambuc            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
418*44bedb31SLionel Sambuc                (void)inflateEnd(&amp;strm);
419*44bedb31SLionel Sambuc                return Z_ERRNO;
420*44bedb31SLionel Sambuc            }
421*44bedb31SLionel Sambuc</b></pre>
422*44bedb31SLionel SambucThe inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated
423*44bedb31SLionel Sambucby not filling the output buffer, just as for <tt>deflate()</tt>.  In this case, we cannot
424*44bedb31SLionel Sambucassert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file
425*44bedb31SLionel Sambucdoes.
426*44bedb31SLionel Sambuc<pre><b>
427*44bedb31SLionel Sambuc        } while (strm.avail_out == 0);
428*44bedb31SLionel Sambuc</b></pre><!-- -->
429*44bedb31SLionel SambucThe outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the
430*44bedb31SLionel Sambucend of the input <em>zlib</em> stream, has completed the decompression and integrity
431*44bedb31SLionel Sambuccheck, and has provided all of the output.  This is indicated by the <tt>inflate()</tt>
432*44bedb31SLionel Sambucreturn value <tt>Z_STREAM_END</tt>.  The inner loop is guaranteed to leave <tt>ret</tt>
433*44bedb31SLionel Sambucequal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end
434*44bedb31SLionel Sambucof the <em>zlib</em> stream.  So if the return value is not <tt>Z_STREAM_END</tt>, the
435*44bedb31SLionel Sambucloop continues to read more input.
436*44bedb31SLionel Sambuc<pre><b>
437*44bedb31SLionel Sambuc        /* done when inflate() says it's done */
438*44bedb31SLionel Sambuc    } while (ret != Z_STREAM_END);
439*44bedb31SLionel Sambuc</b></pre><!-- -->
440*44bedb31SLionel SambucAt this point, decompression successfully completed, or we broke out of the loop due to no
441*44bedb31SLionel Sambucmore data being available from the input file.  If the last <tt>inflate()</tt> return value
442*44bedb31SLionel Sambucis not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error
443*44bedb31SLionel Sambucis returned.  Otherwise, we return with a happy return value.  Of course, <tt>inflateEnd()</tt>
444*44bedb31SLionel Sambucis called first to avoid a memory leak.
445*44bedb31SLionel Sambuc<pre><b>
446*44bedb31SLionel Sambuc    /* clean up and return */
447*44bedb31SLionel Sambuc    (void)inflateEnd(&amp;strm);
448*44bedb31SLionel Sambuc    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
449*44bedb31SLionel Sambuc}
450*44bedb31SLionel Sambuc</b></pre><!-- -->
451*44bedb31SLionel SambucThat ends the routines that directly use <em>zlib</em>.  The following routines make this
452*44bedb31SLionel Sambuca command-line program by running data through the above routines from <tt>stdin</tt> to
453*44bedb31SLionel Sambuc<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>.
454*44bedb31SLionel Sambuc<p>
455*44bedb31SLionel Sambuc<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt>
456*44bedb31SLionel Sambucand <tt>inf()</tt>, as detailed in their comments above, and print out an error message.
457*44bedb31SLionel SambucNote that these are only a subset of the possible return values from <tt>deflate()</tt>
458*44bedb31SLionel Sambucand <tt>inflate()</tt>.
459*44bedb31SLionel Sambuc<pre><b>
460*44bedb31SLionel Sambuc/* report a zlib or i/o error */
461*44bedb31SLionel Sambucvoid zerr(int ret)
462*44bedb31SLionel Sambuc{
463*44bedb31SLionel Sambuc    fputs("zpipe: ", stderr);
464*44bedb31SLionel Sambuc    switch (ret) {
465*44bedb31SLionel Sambuc    case Z_ERRNO:
466*44bedb31SLionel Sambuc        if (ferror(stdin))
467*44bedb31SLionel Sambuc            fputs("error reading stdin\n", stderr);
468*44bedb31SLionel Sambuc        if (ferror(stdout))
469*44bedb31SLionel Sambuc            fputs("error writing stdout\n", stderr);
470*44bedb31SLionel Sambuc        break;
471*44bedb31SLionel Sambuc    case Z_STREAM_ERROR:
472*44bedb31SLionel Sambuc        fputs("invalid compression level\n", stderr);
473*44bedb31SLionel Sambuc        break;
474*44bedb31SLionel Sambuc    case Z_DATA_ERROR:
475*44bedb31SLionel Sambuc        fputs("invalid or incomplete deflate data\n", stderr);
476*44bedb31SLionel Sambuc        break;
477*44bedb31SLionel Sambuc    case Z_MEM_ERROR:
478*44bedb31SLionel Sambuc        fputs("out of memory\n", stderr);
479*44bedb31SLionel Sambuc        break;
480*44bedb31SLionel Sambuc    case Z_VERSION_ERROR:
481*44bedb31SLionel Sambuc        fputs("zlib version mismatch!\n", stderr);
482*44bedb31SLionel Sambuc    }
483*44bedb31SLionel Sambuc}
484*44bedb31SLionel Sambuc</b></pre><!-- -->
485*44bedb31SLionel SambucHere is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>.  The
486*44bedb31SLionel Sambuc<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if
487*44bedb31SLionel Sambucno arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used.  If any other
488*44bedb31SLionel Sambucarguments are provided, no compression or decompression is performed.  Instead a usage
489*44bedb31SLionel Sambucmessage is displayed.  Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and
490*44bedb31SLionel Sambuc<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress.
491*44bedb31SLionel Sambuc<pre><b>
492*44bedb31SLionel Sambuc/* compress or decompress from stdin to stdout */
493*44bedb31SLionel Sambucint main(int argc, char **argv)
494*44bedb31SLionel Sambuc{
495*44bedb31SLionel Sambuc    int ret;
496*44bedb31SLionel Sambuc
497*44bedb31SLionel Sambuc    /* do compression if no arguments */
498*44bedb31SLionel Sambuc    if (argc == 1) {
499*44bedb31SLionel Sambuc        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
500*44bedb31SLionel Sambuc        if (ret != Z_OK)
501*44bedb31SLionel Sambuc            zerr(ret);
502*44bedb31SLionel Sambuc        return ret;
503*44bedb31SLionel Sambuc    }
504*44bedb31SLionel Sambuc
505*44bedb31SLionel Sambuc    /* do decompression if -d specified */
506*44bedb31SLionel Sambuc    else if (argc == 2 &amp;&amp; strcmp(argv[1], "-d") == 0) {
507*44bedb31SLionel Sambuc        ret = inf(stdin, stdout);
508*44bedb31SLionel Sambuc        if (ret != Z_OK)
509*44bedb31SLionel Sambuc            zerr(ret);
510*44bedb31SLionel Sambuc        return ret;
511*44bedb31SLionel Sambuc    }
512*44bedb31SLionel Sambuc
513*44bedb31SLionel Sambuc    /* otherwise, report usage */
514*44bedb31SLionel Sambuc    else {
515*44bedb31SLionel Sambuc        fputs("zpipe usage: zpipe [-d] &lt; source &gt; dest\n", stderr);
516*44bedb31SLionel Sambuc        return 1;
517*44bedb31SLionel Sambuc    }
518*44bedb31SLionel Sambuc}
519*44bedb31SLionel Sambuc</b></pre>
520*44bedb31SLionel Sambuc<hr>
521*44bedb31SLionel Sambuc<i>Copyright (c) 2004 by Mark Adler<br>Last modified 13 November 2004</i>
522*44bedb31SLionel Sambuc</body>
523*44bedb31SLionel Sambuc</html>
524