xref: /netbsd-src/common/dist/zlib/examples/zran.c (revision b175d1c2a0d8a7ee59df83b5ae5f0bd11632ced6)
1*b175d1c2Schristos /* zran.c -- example of deflate stream indexing and random access
2*b175d1c2Schristos  * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler
3aaf4ece6Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
4*b175d1c2Schristos  * Version 1.4  13 Apr 2023  Mark Adler */
5c3423655Schristos 
6c3423655Schristos /* Version History:
7c3423655Schristos  1.0  29 May 2005  First version
8c3423655Schristos  1.1  29 Sep 2012  Fix memory reallocation error
9ec47cc4bSchristos  1.2  14 Oct 2018  Handle gzip streams with multiple members
10ec47cc4bSchristos                    Add a header file to facilitate usage in applications
11*b175d1c2Schristos  1.3  18 Feb 2023  Permit raw deflate streams as well as zlib and gzip
12*b175d1c2Schristos                    Permit crossing gzip member boundaries when extracting
13*b175d1c2Schristos                    Support a size_t size when extracting (was an int)
14*b175d1c2Schristos                    Do a binary search over the index for an access point
15*b175d1c2Schristos                    Expose the access point type to enable save and load
16*b175d1c2Schristos  1.4  13 Apr 2023  Add a NOPRIME define to not use inflatePrime()
17c3423655Schristos  */
18aaf4ece6Schristos 
19*b175d1c2Schristos // Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
20*b175d1c2Schristos // for random access of a compressed file. A file containing a raw deflate
21*b175d1c2Schristos // stream is provided on the command line. The compressed stream is decoded in
22*b175d1c2Schristos // its entirety, and an index built with access points about every SPAN bytes
23*b175d1c2Schristos // in the uncompressed output. The compressed file is left open, and can then
24*b175d1c2Schristos // be read randomly, having to decompress on the average SPAN/2 uncompressed
25*b175d1c2Schristos // bytes before getting to the desired block of data.
26*b175d1c2Schristos //
27*b175d1c2Schristos // An access point can be created at the start of any deflate block, by saving
28*b175d1c2Schristos // the starting file offset and bit of that block, and the 32K bytes of
29*b175d1c2Schristos // uncompressed data that precede that block. Also the uncompressed offset of
30*b175d1c2Schristos // that block is saved to provide a reference for locating a desired starting
31*b175d1c2Schristos // point in the uncompressed stream. deflate_index_build() decompresses the
32*b175d1c2Schristos // input raw deflate stream a block at a time, and at the end of each block
33*b175d1c2Schristos // decides if enough uncompressed data has gone by to justify the creation of a
34*b175d1c2Schristos // new access point. If so, that point is saved in a data structure that grows
35*b175d1c2Schristos // as needed to accommodate the points.
36*b175d1c2Schristos //
37*b175d1c2Schristos // To use the index, an offset in the uncompressed data is provided, for which
38*b175d1c2Schristos // the latest access point at or preceding that offset is located in the index.
39*b175d1c2Schristos // The input file is positioned to the specified location in the index, and if
40*b175d1c2Schristos // necessary the first few bits of the compressed data is read from the file.
41*b175d1c2Schristos // inflate is initialized with those bits and the 32K of uncompressed data, and
42*b175d1c2Schristos // decompression then proceeds until the desired offset in the file is reached.
43*b175d1c2Schristos // Then decompression continues to read the requested uncompressed data from
44*b175d1c2Schristos // the file.
45*b175d1c2Schristos //
46*b175d1c2Schristos // There is some fair bit of overhead to starting inflation for the random
47*b175d1c2Schristos // access, mainly copying the 32K byte dictionary. If small pieces of the file
48*b175d1c2Schristos // are being accessed, it would make sense to implement a cache to hold some
49*b175d1c2Schristos // lookahead to avoid many calls to deflate_index_extract() for small lengths.
50*b175d1c2Schristos //
51*b175d1c2Schristos // Another way to build an index would be to use inflateCopy(). That would not
52*b175d1c2Schristos // be constrained to have access points at block boundaries, but would require
53*b175d1c2Schristos // more memory per access point, and could not be saved to a file due to the
54*b175d1c2Schristos // use of pointers in the state. The approach here allows for storage of the
55*b175d1c2Schristos // index in a file.
56aaf4ece6Schristos 
57aaf4ece6Schristos #include <stdio.h>
58aaf4ece6Schristos #include <stdlib.h>
59aaf4ece6Schristos #include <string.h>
60*b175d1c2Schristos #include <limits.h>
61aaf4ece6Schristos #include "zlib.h"
62ec47cc4bSchristos #include "zran.h"
63aaf4ece6Schristos 
64*b175d1c2Schristos #define WINSIZE 32768U      // sliding window size
65*b175d1c2Schristos #define CHUNK 16384         // file input buffer size
66aaf4ece6Schristos 
67*b175d1c2Schristos // See comments in zran.h.
68*b175d1c2Schristos void deflate_index_free(struct deflate_index *index) {
69aaf4ece6Schristos     if (index != NULL) {
70aaf4ece6Schristos         free(index->list);
71aaf4ece6Schristos         free(index);
72aaf4ece6Schristos     }
73aaf4ece6Schristos }
74aaf4ece6Schristos 
75*b175d1c2Schristos // Add an access point to the list. If out of memory, deallocate the existing
76*b175d1c2Schristos // list and return NULL. index->mode is temporarily the allocated number of
77*b175d1c2Schristos // access points, until it is time for deflate_index_build() to return. Then
78*b175d1c2Schristos // index->mode is set to the mode of inflation.
79*b175d1c2Schristos static struct deflate_index *add_point(struct deflate_index *index, int bits,
80ec47cc4bSchristos                                        off_t in, off_t out, unsigned left,
81*b175d1c2Schristos                                        unsigned char *window) {
82aaf4ece6Schristos     if (index == NULL) {
83*b175d1c2Schristos         // The list is empty. Create it, starting with eight access points.
84ec47cc4bSchristos         index = malloc(sizeof(struct deflate_index));
85*b175d1c2Schristos         if (index == NULL)
86*b175d1c2Schristos             return NULL;
87*b175d1c2Schristos         index->have = 0;
88*b175d1c2Schristos         index->mode = 8;
89*b175d1c2Schristos         index->list = malloc(sizeof(point_t) * index->mode);
90aaf4ece6Schristos         if (index->list == NULL) {
91aaf4ece6Schristos             free(index);
92aaf4ece6Schristos             return NULL;
93aaf4ece6Schristos         }
94aaf4ece6Schristos     }
95aaf4ece6Schristos 
96*b175d1c2Schristos     else if (index->have == index->mode) {
97*b175d1c2Schristos         // The list is full. Make it bigger.
98*b175d1c2Schristos         index->mode <<= 1;
99*b175d1c2Schristos         point_t *next = realloc(index->list, sizeof(point_t) * index->mode);
100aaf4ece6Schristos         if (next == NULL) {
101ec47cc4bSchristos             deflate_index_free(index);
102aaf4ece6Schristos             return NULL;
103aaf4ece6Schristos         }
104aaf4ece6Schristos         index->list = next;
105aaf4ece6Schristos     }
106aaf4ece6Schristos 
107*b175d1c2Schristos     // Fill in the access point and increment how many we have.
108*b175d1c2Schristos     point_t *next = (point_t *)(index->list) + index->have++;
109*b175d1c2Schristos     if (index->have < 0) {
110*b175d1c2Schristos         // Overflowed the int!
111*b175d1c2Schristos         deflate_index_free(index);
112*b175d1c2Schristos         return NULL;
113*b175d1c2Schristos     }
114aaf4ece6Schristos     next->out = out;
115*b175d1c2Schristos     next->in = in;
116*b175d1c2Schristos     next->bits = bits;
117aaf4ece6Schristos     if (left)
118aaf4ece6Schristos         memcpy(next->window, window + WINSIZE - left, left);
119aaf4ece6Schristos     if (left < WINSIZE)
120aaf4ece6Schristos         memcpy(next->window + left, window, WINSIZE - left);
121aaf4ece6Schristos 
122*b175d1c2Schristos     // Return the index, which may have been newly allocated or destroyed.
123aaf4ece6Schristos     return index;
124aaf4ece6Schristos }
125aaf4ece6Schristos 
126*b175d1c2Schristos // Decompression modes. These are the inflateInit2() windowBits parameter.
127*b175d1c2Schristos #define RAW -15
128*b175d1c2Schristos #define ZLIB 15
129*b175d1c2Schristos #define GZIP 31
130aaf4ece6Schristos 
131*b175d1c2Schristos // See comments in zran.h.
132*b175d1c2Schristos int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) {
133*b175d1c2Schristos     // Set up inflation state.
134*b175d1c2Schristos     z_stream strm = {0};        // inflate engine (gets fired up later)
135*b175d1c2Schristos     unsigned char buf[CHUNK];   // input buffer
136*b175d1c2Schristos     unsigned char win[WINSIZE] = {0};   // output sliding window
137*b175d1c2Schristos     off_t totin = 0;            // total bytes read from input
138*b175d1c2Schristos     off_t totout = 0;           // total bytes uncompressed
139*b175d1c2Schristos     int mode = 0;               // mode: RAW, ZLIB, or GZIP (0 => not set yet)
140aaf4ece6Schristos 
141*b175d1c2Schristos     // Decompress from in, generating access points along the way.
142*b175d1c2Schristos     int ret;                    // the return value from zlib, or Z_ERRNO
143*b175d1c2Schristos     off_t last;                 // last access point uncompressed offset
144*b175d1c2Schristos     struct deflate_index *index = NULL;     // list of access points
145aaf4ece6Schristos     do {
146*b175d1c2Schristos         // Assure available input, at least until reaching EOF.
147aaf4ece6Schristos         if (strm.avail_in == 0) {
148*b175d1c2Schristos             strm.avail_in = fread(buf, 1, sizeof(buf), in);
149aaf4ece6Schristos             totin += strm.avail_in;
150*b175d1c2Schristos             strm.next_in = buf;
151*b175d1c2Schristos             if (strm.avail_in < sizeof(buf) && ferror(in)) {
152*b175d1c2Schristos                 ret = Z_ERRNO;
153aaf4ece6Schristos                 break;
154ec47cc4bSchristos             }
155aaf4ece6Schristos 
156*b175d1c2Schristos             if (mode == 0) {
157*b175d1c2Schristos                 // At the start of the input -- determine the type. Assume raw
158*b175d1c2Schristos                 // if it is neither zlib nor gzip. This could in theory result
159*b175d1c2Schristos                 // in a false positive for zlib, but in practice the fill bits
160*b175d1c2Schristos                 // after a stored block are always zeros, so a raw stream won't
161*b175d1c2Schristos                 // start with an 8 in the low nybble.
162*b175d1c2Schristos                 mode = strm.avail_in == 0 ? RAW :       // empty -- will fail
163*b175d1c2Schristos                        (strm.next_in[0] & 0xf) == 8 ? ZLIB :
164*b175d1c2Schristos                        strm.next_in[0] == 0x1f ? GZIP :
165*b175d1c2Schristos                        /* else */ RAW;
166*b175d1c2Schristos                 ret = inflateInit2(&strm, mode);
167*b175d1c2Schristos                 if (ret != Z_OK)
168*b175d1c2Schristos                     break;
169*b175d1c2Schristos             }
170*b175d1c2Schristos         }
171*b175d1c2Schristos 
172*b175d1c2Schristos         // Assure available output. This rotates the output through, for use as
173*b175d1c2Schristos         // a sliding window on the uncompressed data.
174*b175d1c2Schristos         if (strm.avail_out == 0) {
175*b175d1c2Schristos             strm.avail_out = sizeof(win);
176*b175d1c2Schristos             strm.next_out = win;
177*b175d1c2Schristos         }
178*b175d1c2Schristos 
179*b175d1c2Schristos         if (mode == RAW && index == NULL)
180*b175d1c2Schristos             // We skip the inflate() call at the start of raw deflate data in
181*b175d1c2Schristos             // order generate an access point there. Set data_type to imitate
182*b175d1c2Schristos             // the end of a header.
183*b175d1c2Schristos             strm.data_type = 0x80;
184*b175d1c2Schristos         else {
185*b175d1c2Schristos             // Inflate and update the number of uncompressed bytes.
186*b175d1c2Schristos             unsigned before = strm.avail_out;
187*b175d1c2Schristos             ret = inflate(&strm, Z_BLOCK);
188*b175d1c2Schristos             totout += before - strm.avail_out;
189*b175d1c2Schristos         }
190*b175d1c2Schristos 
191*b175d1c2Schristos         if ((strm.data_type & 0xc0) == 0x80 &&
192*b175d1c2Schristos             (index == NULL || totout - last >= span)) {
193*b175d1c2Schristos             // We are at the end of a header or a non-last deflate block, so we
194*b175d1c2Schristos             // can add an access point here. Furthermore, we are either at the
195*b175d1c2Schristos             // very start for the first access point, or there has been span or
196*b175d1c2Schristos             // more uncompressed bytes since the last access point, so we want
197*b175d1c2Schristos             // to add an access point here.
198*b175d1c2Schristos             index = add_point(index, strm.data_type & 7, totin - strm.avail_in,
199*b175d1c2Schristos                               totout, strm.avail_out, win);
200aaf4ece6Schristos             if (index == NULL) {
201aaf4ece6Schristos                 ret = Z_MEM_ERROR;
202*b175d1c2Schristos                 break;
203aaf4ece6Schristos             }
204aaf4ece6Schristos             last = totout;
205aaf4ece6Schristos         }
206aaf4ece6Schristos 
207*b175d1c2Schristos         if (ret == Z_STREAM_END && mode == GZIP &&
208*b175d1c2Schristos             (strm.avail_in || ungetc(getc(in), in) != EOF))
209*b175d1c2Schristos             // There is more input after the end of a gzip member. Reset the
210*b175d1c2Schristos             // inflate state to read another gzip member. On success, this will
211*b175d1c2Schristos             // set ret to Z_OK to continue decompressing.
212*b175d1c2Schristos             ret = inflateReset2(&strm, GZIP);
213*b175d1c2Schristos 
214*b175d1c2Schristos         // Keep going until Z_STREAM_END or error. If the compressed data ends
215*b175d1c2Schristos         // prematurely without a file read error, Z_BUF_ERROR is returned.
216*b175d1c2Schristos     } while (ret == Z_OK);
217*b175d1c2Schristos     inflateEnd(&strm);
218*b175d1c2Schristos 
219*b175d1c2Schristos     if (ret != Z_STREAM_END) {
220*b175d1c2Schristos         // An error was encountered. Discard the index and return a negative
221*b175d1c2Schristos         // error code.
222*b175d1c2Schristos         deflate_index_free(index);
223*b175d1c2Schristos         return ret == Z_NEED_DICT ? Z_DATA_ERROR : ret;
224*b175d1c2Schristos     }
225*b175d1c2Schristos 
226*b175d1c2Schristos     // Shrink the index to only the occupied access points and return it.
227*b175d1c2Schristos     index->mode = mode;
228ec47cc4bSchristos     index->length = totout;
229*b175d1c2Schristos     point_t *list = realloc(index->list, sizeof(point_t) * index->have);
230*b175d1c2Schristos     if (list == NULL) {
231*b175d1c2Schristos         // Seems like a realloc() to make something smaller should always work,
232*b175d1c2Schristos         // but just in case.
233*b175d1c2Schristos         deflate_index_free(index);
234*b175d1c2Schristos         return Z_MEM_ERROR;
235*b175d1c2Schristos     }
236*b175d1c2Schristos     index->list = list;
237aaf4ece6Schristos     *built = index;
238ec47cc4bSchristos     return index->have;
239aaf4ece6Schristos }
240aaf4ece6Schristos 
241*b175d1c2Schristos #ifdef NOPRIME
242*b175d1c2Schristos // Support zlib versions before 1.2.3 (July 2005), or incomplete zlib clones
243*b175d1c2Schristos // that do not have inflatePrime().
244aaf4ece6Schristos 
245*b175d1c2Schristos #  define INFLATEPRIME inflatePreface
246*b175d1c2Schristos 
247*b175d1c2Schristos // Append the low bits bits of value to in[] at bit position *have, updating
248*b175d1c2Schristos // *have. value must be zero above its low bits bits. bits must be positive.
249*b175d1c2Schristos // This assumes that any bits above the *have bits in the last byte are zeros.
250*b175d1c2Schristos // That assumption is preserved on return, as any bits above *have + bits in
251*b175d1c2Schristos // the last byte written will be set to zeros.
252*b175d1c2Schristos static inline void append_bits(unsigned value, int bits,
253*b175d1c2Schristos                                unsigned char *in, int *have) {
254*b175d1c2Schristos     in += *have >> 3;           // where the first bits from value will go
255*b175d1c2Schristos     int k = *have & 7;          // the number of bits already there
256*b175d1c2Schristos     *have += bits;
257*b175d1c2Schristos     if (k)
258*b175d1c2Schristos         *in |= value << k;      // write value above the low k bits
259*b175d1c2Schristos     else
260*b175d1c2Schristos         *in = value;
261*b175d1c2Schristos     k = 8 - k;                  // the number of bits just appended
262*b175d1c2Schristos     while (bits > k) {
263*b175d1c2Schristos         value >>= k;            // drop the bits appended
264*b175d1c2Schristos         bits -= k;
265*b175d1c2Schristos         k = 8;                  // now at a byte boundary
266*b175d1c2Schristos         *++in = value;
267*b175d1c2Schristos     }
268*b175d1c2Schristos }
269*b175d1c2Schristos 
270*b175d1c2Schristos // Insert enough bits in the form of empty deflate blocks in front of the
271*b175d1c2Schristos // low bits bits of value, in order to bring the sequence to a byte boundary.
272*b175d1c2Schristos // Then feed that to inflate(). This does what inflatePrime() does, except that
273*b175d1c2Schristos // a negative value of bits is not supported. bits must be in 0..16. If the
274*b175d1c2Schristos // arguments are invalid, Z_STREAM_ERROR is returned. Otherwise the return
275*b175d1c2Schristos // value from inflate() is returned.
276*b175d1c2Schristos static int inflatePreface(z_stream *strm, int bits, int value) {
277*b175d1c2Schristos     // Check input.
278*b175d1c2Schristos     if (strm == Z_NULL || bits < 0 || bits > 16)
279*b175d1c2Schristos         return Z_STREAM_ERROR;
280*b175d1c2Schristos     if (bits == 0)
281*b175d1c2Schristos         return Z_OK;
282*b175d1c2Schristos     value &= (2 << (bits - 1)) - 1;
283*b175d1c2Schristos 
284*b175d1c2Schristos     // An empty dynamic block with an odd number of bits (95). The high bit of
285*b175d1c2Schristos     // the last byte is unused.
286*b175d1c2Schristos     static const unsigned char dyn[] = {
287*b175d1c2Schristos         4, 0xe0, 0x81, 8, 0, 0, 0, 0, 0x20, 0xa8, 0xab, 0x1f
288*b175d1c2Schristos     };
289*b175d1c2Schristos     const int dynlen = 95;          // number of bits in the block
290*b175d1c2Schristos 
291*b175d1c2Schristos     // Build an input buffer for inflate that is a multiple of eight bits in
292*b175d1c2Schristos     // length, and that ends with the low bits bits of value.
293*b175d1c2Schristos     unsigned char in[(dynlen + 3 * 10 + 16 + 7) / 8];
294*b175d1c2Schristos     int have = 0;
295*b175d1c2Schristos     if (bits & 1) {
296*b175d1c2Schristos         // Insert an empty dynamic block to get to an odd number of bits, so
297*b175d1c2Schristos         // when bits bits from value are appended, we are at an even number of
298*b175d1c2Schristos         // bits.
299*b175d1c2Schristos         memcpy(in, dyn, sizeof(dyn));
300*b175d1c2Schristos         have = dynlen;
301*b175d1c2Schristos     }
302*b175d1c2Schristos     while ((have + bits) & 7)
303*b175d1c2Schristos         // Insert empty fixed blocks until appending bits bits would put us on
304*b175d1c2Schristos         // a byte boundary. This will insert at most three fixed blocks.
305*b175d1c2Schristos         append_bits(2, 10, in, &have);
306*b175d1c2Schristos 
307*b175d1c2Schristos     // Append the bits bits from value, which takes us to a byte boundary.
308*b175d1c2Schristos     append_bits(value, bits, in, &have);
309*b175d1c2Schristos 
310*b175d1c2Schristos     // Deliver the input to inflate(). There is no output space provided, but
311*b175d1c2Schristos     // inflate() can't get stuck waiting on output not ingesting all of the
312*b175d1c2Schristos     // provided input. The reason is that there will be at most 16 bits of
313*b175d1c2Schristos     // input from value after the empty deflate blocks (which themselves
314*b175d1c2Schristos     // generate no output). At least ten bits are needed to generate the first
315*b175d1c2Schristos     // output byte from a fixed block. The last two bytes of the buffer have to
316*b175d1c2Schristos     // be ingested in order to get ten bits, which is the most that value can
317*b175d1c2Schristos     // occupy.
318*b175d1c2Schristos     strm->avail_in = have >> 3;
319*b175d1c2Schristos     strm->next_in = in;
320*b175d1c2Schristos     strm->avail_out = 0;
321*b175d1c2Schristos     strm->next_out = in;                // not used, but can't be NULL
322*b175d1c2Schristos     return inflate(strm, Z_NO_FLUSH);
323*b175d1c2Schristos }
324*b175d1c2Schristos 
325*b175d1c2Schristos #else
326*b175d1c2Schristos #  define INFLATEPRIME inflatePrime
327*b175d1c2Schristos #endif
328*b175d1c2Schristos 
329*b175d1c2Schristos // See comments in zran.h.
330*b175d1c2Schristos ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index,
331*b175d1c2Schristos                                 off_t offset, unsigned char *buf, size_t len) {
332*b175d1c2Schristos     // Do a quick sanity check on the index.
333*b175d1c2Schristos     if (index == NULL || index->have < 1 || index->list[0].out != 0)
334*b175d1c2Schristos         return Z_STREAM_ERROR;
335*b175d1c2Schristos 
336*b175d1c2Schristos     // If nothing to extract, return zero bytes extracted.
337*b175d1c2Schristos     if (len == 0 || offset < 0 || offset >= index->length)
338aaf4ece6Schristos         return 0;
339aaf4ece6Schristos 
340*b175d1c2Schristos     // Find the access point closest to but not after offset.
341*b175d1c2Schristos     int lo = -1, hi = index->have;
342*b175d1c2Schristos     point_t *point = index->list;
343*b175d1c2Schristos     while (hi - lo > 1) {
344*b175d1c2Schristos         int mid = (lo + hi) >> 1;
345*b175d1c2Schristos         if (offset < point[mid].out)
346*b175d1c2Schristos             hi = mid;
347*b175d1c2Schristos         else
348*b175d1c2Schristos             lo = mid;
349*b175d1c2Schristos     }
350*b175d1c2Schristos     point += lo;
351aaf4ece6Schristos 
352*b175d1c2Schristos     // Initialize the input file and prime the inflate engine to start there.
353*b175d1c2Schristos     int ret = fseeko(in, point->in - (point->bits ? 1 : 0), SEEK_SET);
354*b175d1c2Schristos     if (ret == -1)
355*b175d1c2Schristos         return Z_ERRNO;
356*b175d1c2Schristos     int ch = 0;
357*b175d1c2Schristos     if (point->bits && (ch = getc(in)) == EOF)
358*b175d1c2Schristos         return ferror(in) ? Z_ERRNO : Z_BUF_ERROR;
359*b175d1c2Schristos     z_stream strm = {0};
360*b175d1c2Schristos     ret = inflateInit2(&strm, RAW);
361aaf4ece6Schristos     if (ret != Z_OK)
362aaf4ece6Schristos         return ret;
363*b175d1c2Schristos     if (point->bits)
364*b175d1c2Schristos         INFLATEPRIME(&strm, point->bits, ch >> (8 - point->bits));
365*b175d1c2Schristos     inflateSetDictionary(&strm, point->window, WINSIZE);
366aaf4ece6Schristos 
367*b175d1c2Schristos     // Skip uncompressed bytes until offset reached, then satisfy request.
368*b175d1c2Schristos     unsigned char input[CHUNK];
369*b175d1c2Schristos     unsigned char discard[WINSIZE];
370*b175d1c2Schristos     offset -= point->out;       // number of bytes to skip to get to offset
371*b175d1c2Schristos     size_t left = len;          // number of bytes left to read after offset
372aaf4ece6Schristos     do {
373*b175d1c2Schristos         if (offset) {
374*b175d1c2Schristos             // Discard up to offset uncompressed bytes.
375*b175d1c2Schristos             strm.avail_out = offset < WINSIZE ? (unsigned)offset : WINSIZE;
376aaf4ece6Schristos             strm.next_out = discard;
377ec47cc4bSchristos         }
378ec47cc4bSchristos         else {
379*b175d1c2Schristos             // Uncompress up to left bytes into buf.
380*b175d1c2Schristos             strm.avail_out = left < UINT_MAX ? (unsigned)left : UINT_MAX;
381*b175d1c2Schristos             strm.next_out = buf + len - left;
382ec47cc4bSchristos         }
383ec47cc4bSchristos 
384*b175d1c2Schristos         // Uncompress, setting got to the number of bytes uncompressed.
385ec47cc4bSchristos         if (strm.avail_in == 0) {
386*b175d1c2Schristos             // Assure available input.
387ec47cc4bSchristos             strm.avail_in = fread(input, 1, CHUNK, in);
388*b175d1c2Schristos             if (strm.avail_in < CHUNK && ferror(in)) {
389ec47cc4bSchristos                 ret = Z_ERRNO;
390*b175d1c2Schristos                 break;
391ec47cc4bSchristos             }
392ec47cc4bSchristos             strm.next_in = input;
393ec47cc4bSchristos         }
394*b175d1c2Schristos         unsigned got = strm.avail_out;
395*b175d1c2Schristos         ret = inflate(&strm, Z_NO_FLUSH);
396*b175d1c2Schristos         got -= strm.avail_out;
397ec47cc4bSchristos 
398*b175d1c2Schristos         // Update the appropriate count.
399*b175d1c2Schristos         if (offset)
400*b175d1c2Schristos             offset -= got;
401*b175d1c2Schristos         else
402*b175d1c2Schristos             left -= got;
403*b175d1c2Schristos 
404*b175d1c2Schristos         // If we're at the end of a gzip member and there's more to read,
405*b175d1c2Schristos         // continue to the next gzip member.
406*b175d1c2Schristos         if (ret == Z_STREAM_END && index->mode == GZIP) {
407*b175d1c2Schristos             // Discard the gzip trailer.
408*b175d1c2Schristos             unsigned drop = 8;              // length of gzip trailer
409*b175d1c2Schristos             if (strm.avail_in >= drop) {
410*b175d1c2Schristos                 strm.avail_in -= drop;
411*b175d1c2Schristos                 strm.next_in += drop;
412*b175d1c2Schristos             }
413*b175d1c2Schristos             else {
414*b175d1c2Schristos                 // Read and discard the remainder of the gzip trailer.
415*b175d1c2Schristos                 drop -= strm.avail_in;
416*b175d1c2Schristos                 strm.avail_in = 0;
417*b175d1c2Schristos                 do {
418*b175d1c2Schristos                     if (getc(in) == EOF)
419*b175d1c2Schristos                         // The input does not have a complete trailer.
420*b175d1c2Schristos                         return ferror(in) ? Z_ERRNO : Z_BUF_ERROR;
421*b175d1c2Schristos                 } while (--drop);
422ec47cc4bSchristos             }
423ec47cc4bSchristos 
424*b175d1c2Schristos             if (strm.avail_in || ungetc(getc(in), in) != EOF) {
425*b175d1c2Schristos                 // There's more after the gzip trailer. Use inflate to skip the
426*b175d1c2Schristos                 // gzip header and resume the raw inflate there.
427*b175d1c2Schristos                 inflateReset2(&strm, GZIP);
428*b175d1c2Schristos                 do {
429*b175d1c2Schristos                     if (strm.avail_in == 0) {
430*b175d1c2Schristos                         strm.avail_in = fread(input, 1, CHUNK, in);
431*b175d1c2Schristos                         if (strm.avail_in < CHUNK && ferror(in)) {
432*b175d1c2Schristos                             ret = Z_ERRNO;
433aaf4ece6Schristos                             break;
434*b175d1c2Schristos                         }
435*b175d1c2Schristos                         strm.next_in = input;
436*b175d1c2Schristos                     }
437*b175d1c2Schristos                     strm.avail_out = WINSIZE;
438*b175d1c2Schristos                     strm.next_out = discard;
439*b175d1c2Schristos                     ret = inflate(&strm, Z_BLOCK);  // stop at end of header
440*b175d1c2Schristos                 } while (ret == Z_OK && (strm.data_type & 0x80) == 0);
441*b175d1c2Schristos                 if (ret != Z_OK)
442*b175d1c2Schristos                     break;
443*b175d1c2Schristos                 inflateReset2(&strm, RAW);
444*b175d1c2Schristos             }
445*b175d1c2Schristos         }
446aaf4ece6Schristos 
447*b175d1c2Schristos         // Continue until we have the requested data, the deflate data has
448*b175d1c2Schristos         // ended, or an error is encountered.
449*b175d1c2Schristos     } while (ret == Z_OK && left);
450*b175d1c2Schristos     inflateEnd(&strm);
451aaf4ece6Schristos 
452*b175d1c2Schristos     // Return the number of uncompressed bytes read into buf, or the error.
453*b175d1c2Schristos     return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret;
454aaf4ece6Schristos }
455aaf4ece6Schristos 
456ec47cc4bSchristos #ifdef TEST
457ec47cc4bSchristos 
458*b175d1c2Schristos #define SPAN 1048576L       // desired distance between access points
459*b175d1c2Schristos #define LEN 16384           // number of bytes to extract
460ec47cc4bSchristos 
461*b175d1c2Schristos // Demonstrate the use of deflate_index_build() and deflate_index_extract() by
462*b175d1c2Schristos // processing the file provided on the command line, and extracting LEN bytes
463*b175d1c2Schristos // from 2/3rds of the way through the uncompressed output, writing that to
464*b175d1c2Schristos // stdout. An offset can be provided as the second argument, in which case the
465*b175d1c2Schristos // data is extracted from there instead.
466*b175d1c2Schristos int main(int argc, char **argv) {
467*b175d1c2Schristos     // Open the input file.
468ec47cc4bSchristos     if (argc < 2 || argc > 3) {
469*b175d1c2Schristos         fprintf(stderr, "usage: zran file.raw [offset]\n");
470aaf4ece6Schristos         return 1;
471aaf4ece6Schristos     }
472*b175d1c2Schristos     FILE *in = fopen(argv[1], "rb");
473aaf4ece6Schristos     if (in == NULL) {
474aaf4ece6Schristos         fprintf(stderr, "zran: could not open %s for reading\n", argv[1]);
475aaf4ece6Schristos         return 1;
476aaf4ece6Schristos     }
477aaf4ece6Schristos 
478*b175d1c2Schristos     // Get optional offset.
479*b175d1c2Schristos     off_t offset = -1;
480ec47cc4bSchristos     if (argc == 3) {
481ec47cc4bSchristos         char *end;
482ec47cc4bSchristos         offset = strtoll(argv[2], &end, 10);
483ec47cc4bSchristos         if (*end || offset < 0) {
484ec47cc4bSchristos             fprintf(stderr, "zran: %s is not a valid offset\n", argv[2]);
485ec47cc4bSchristos             return 1;
486ec47cc4bSchristos         }
487ec47cc4bSchristos     }
488ec47cc4bSchristos 
489*b175d1c2Schristos     // Build index.
490*b175d1c2Schristos     struct deflate_index *index = NULL;
491*b175d1c2Schristos     int len = deflate_index_build(in, SPAN, &index);
492aaf4ece6Schristos     if (len < 0) {
493aaf4ece6Schristos         fclose(in);
494aaf4ece6Schristos         switch (len) {
495aaf4ece6Schristos         case Z_MEM_ERROR:
496aaf4ece6Schristos             fprintf(stderr, "zran: out of memory\n");
497aaf4ece6Schristos             break;
498*b175d1c2Schristos         case Z_BUF_ERROR:
499*b175d1c2Schristos             fprintf(stderr, "zran: %s ended prematurely\n", argv[1]);
500*b175d1c2Schristos             break;
501aaf4ece6Schristos         case Z_DATA_ERROR:
502aaf4ece6Schristos             fprintf(stderr, "zran: compressed data error in %s\n", argv[1]);
503aaf4ece6Schristos             break;
504aaf4ece6Schristos         case Z_ERRNO:
505aaf4ece6Schristos             fprintf(stderr, "zran: read error on %s\n", argv[1]);
506aaf4ece6Schristos             break;
507aaf4ece6Schristos         default:
508aaf4ece6Schristos             fprintf(stderr, "zran: error %d while building index\n", len);
509aaf4ece6Schristos         }
510aaf4ece6Schristos         return 1;
511aaf4ece6Schristos     }
512aaf4ece6Schristos     fprintf(stderr, "zran: built index with %d access points\n", len);
513aaf4ece6Schristos 
514*b175d1c2Schristos     // Use index by reading some bytes from an arbitrary offset.
515*b175d1c2Schristos     unsigned char buf[LEN];
516ec47cc4bSchristos     if (offset == -1)
517*b175d1c2Schristos         offset = ((index->length + 1) << 1) / 3;
518*b175d1c2Schristos     ptrdiff_t got = deflate_index_extract(in, index, offset, buf, LEN);
519*b175d1c2Schristos     if (got < 0)
520aaf4ece6Schristos         fprintf(stderr, "zran: extraction failed: %s error\n",
521*b175d1c2Schristos                 got == Z_MEM_ERROR ? "out of memory" : "input corrupted");
522aaf4ece6Schristos     else {
523*b175d1c2Schristos         fwrite(buf, 1, got, stdout);
524*b175d1c2Schristos         fprintf(stderr, "zran: extracted %ld bytes at %lld\n", got, offset);
525aaf4ece6Schristos     }
526aaf4ece6Schristos 
527*b175d1c2Schristos     // Clean up and exit.
528ec47cc4bSchristos     deflate_index_free(index);
529aaf4ece6Schristos     fclose(in);
530aaf4ece6Schristos     return 0;
531aaf4ece6Schristos }
532ec47cc4bSchristos 
533ec47cc4bSchristos #endif
534