xref: /netbsd-src/external/gpl3/gdb/dist/zlib/examples/gzjoin.c (revision ba340e457da88a40806d62ac0f140844ca1436e9)
1212397c6Schristos /* gzjoin -- command to join gzip files into one gzip file
2212397c6Schristos 
3*ba340e45Schristos   Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
4*ba340e45Schristos   version 1.2, 14 Aug 2012
5212397c6Schristos 
6212397c6Schristos   This software is provided 'as-is', without any express or implied
7212397c6Schristos   warranty.  In no event will the author be held liable for any damages
8212397c6Schristos   arising from the use of this software.
9212397c6Schristos 
10212397c6Schristos   Permission is granted to anyone to use this software for any purpose,
11212397c6Schristos   including commercial applications, and to alter it and redistribute it
12212397c6Schristos   freely, subject to the following restrictions:
13212397c6Schristos 
14212397c6Schristos   1. The origin of this software must not be misrepresented; you must not
15212397c6Schristos      claim that you wrote the original software. If you use this software
16212397c6Schristos      in a product, an acknowledgment in the product documentation would be
17212397c6Schristos      appreciated but is not required.
18212397c6Schristos   2. Altered source versions must be plainly marked as such, and must not be
19212397c6Schristos      misrepresented as being the original software.
20212397c6Schristos   3. This notice may not be removed or altered from any source distribution.
21212397c6Schristos 
22212397c6Schristos   Mark Adler    madler@alumni.caltech.edu
23212397c6Schristos  */
24212397c6Schristos 
25212397c6Schristos /*
26212397c6Schristos  * Change history:
27212397c6Schristos  *
28212397c6Schristos  * 1.0  11 Dec 2004     - First version
29212397c6Schristos  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
30*ba340e45Schristos  * 1.2  14 Aug 2012     - Clean up for z_const usage
31212397c6Schristos  */
32212397c6Schristos 
33212397c6Schristos /*
34212397c6Schristos    gzjoin takes one or more gzip files on the command line and writes out a
35212397c6Schristos    single gzip file that will uncompress to the concatenation of the
36212397c6Schristos    uncompressed data from the individual gzip files.  gzjoin does this without
37212397c6Schristos    having to recompress any of the data and without having to calculate a new
38212397c6Schristos    crc32 for the concatenated uncompressed data.  gzjoin does however have to
39212397c6Schristos    decompress all of the input data in order to find the bits in the compressed
40212397c6Schristos    data that need to be modified to concatenate the streams.
41212397c6Schristos 
42212397c6Schristos    gzjoin does not do an integrity check on the input gzip files other than
43212397c6Schristos    checking the gzip header and decompressing the compressed data.  They are
44212397c6Schristos    otherwise assumed to be complete and correct.
45212397c6Schristos 
46212397c6Schristos    Each joint between gzip files removes at least 18 bytes of previous trailer
47212397c6Schristos    and subsequent header, and inserts an average of about three bytes to the
48212397c6Schristos    compressed data in order to connect the streams.  The output gzip file
49212397c6Schristos    has a minimal ten-byte gzip header with no file name or modification time.
50212397c6Schristos 
51212397c6Schristos    This program was written to illustrate the use of the Z_BLOCK option of
52212397c6Schristos    inflate() and the crc32_combine() function.  gzjoin will not compile with
53212397c6Schristos    versions of zlib earlier than 1.2.3.
54212397c6Schristos  */
55212397c6Schristos 
56212397c6Schristos #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
57212397c6Schristos #include <stdlib.h>     /* exit(), malloc(), free() */
58212397c6Schristos #include <fcntl.h>      /* open() */
59212397c6Schristos #include <unistd.h>     /* close(), read(), lseek() */
60212397c6Schristos #include "zlib.h"
61212397c6Schristos     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
62212397c6Schristos 
63212397c6Schristos #define local static
64212397c6Schristos 
65212397c6Schristos /* exit with an error (return a value to allow use in an expression) */
bail(char * why1,char * why2)66212397c6Schristos local int bail(char *why1, char *why2)
67212397c6Schristos {
68212397c6Schristos     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
69212397c6Schristos     exit(1);
70212397c6Schristos     return 0;
71212397c6Schristos }
72212397c6Schristos 
73212397c6Schristos /* -- simple buffered file input with access to the buffer -- */
74212397c6Schristos 
75212397c6Schristos #define CHUNK 32768         /* must be a power of two and fit in unsigned */
76212397c6Schristos 
77212397c6Schristos /* bin buffered input file type */
78212397c6Schristos typedef struct {
79212397c6Schristos     char *name;             /* name of file for error messages */
80212397c6Schristos     int fd;                 /* file descriptor */
81212397c6Schristos     unsigned left;          /* bytes remaining at next */
82212397c6Schristos     unsigned char *next;    /* next byte to read */
83212397c6Schristos     unsigned char *buf;     /* allocated buffer of length CHUNK */
84212397c6Schristos } bin;
85212397c6Schristos 
86212397c6Schristos /* close a buffered file and free allocated memory */
bclose(bin * in)87212397c6Schristos local void bclose(bin *in)
88212397c6Schristos {
89212397c6Schristos     if (in != NULL) {
90212397c6Schristos         if (in->fd != -1)
91212397c6Schristos             close(in->fd);
92212397c6Schristos         if (in->buf != NULL)
93212397c6Schristos             free(in->buf);
94212397c6Schristos         free(in);
95212397c6Schristos     }
96212397c6Schristos }
97212397c6Schristos 
98212397c6Schristos /* open a buffered file for input, return a pointer to type bin, or NULL on
99212397c6Schristos    failure */
bopen(char * name)100212397c6Schristos local bin *bopen(char *name)
101212397c6Schristos {
102212397c6Schristos     bin *in;
103212397c6Schristos 
104212397c6Schristos     in = malloc(sizeof(bin));
105212397c6Schristos     if (in == NULL)
106212397c6Schristos         return NULL;
107212397c6Schristos     in->buf = malloc(CHUNK);
108212397c6Schristos     in->fd = open(name, O_RDONLY, 0);
109212397c6Schristos     if (in->buf == NULL || in->fd == -1) {
110212397c6Schristos         bclose(in);
111212397c6Schristos         return NULL;
112212397c6Schristos     }
113212397c6Schristos     in->left = 0;
114212397c6Schristos     in->next = in->buf;
115212397c6Schristos     in->name = name;
116212397c6Schristos     return in;
117212397c6Schristos }
118212397c6Schristos 
119212397c6Schristos /* load buffer from file, return -1 on read error, 0 or 1 on success, with
120212397c6Schristos    1 indicating that end-of-file was reached */
bload(bin * in)121212397c6Schristos local int bload(bin *in)
122212397c6Schristos {
123212397c6Schristos     long len;
124212397c6Schristos 
125212397c6Schristos     if (in == NULL)
126212397c6Schristos         return -1;
127212397c6Schristos     if (in->left != 0)
128212397c6Schristos         return 0;
129212397c6Schristos     in->next = in->buf;
130212397c6Schristos     do {
131212397c6Schristos         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
132212397c6Schristos         if (len < 0)
133212397c6Schristos             return -1;
134212397c6Schristos         in->left += (unsigned)len;
135212397c6Schristos     } while (len != 0 && in->left < CHUNK);
136212397c6Schristos     return len == 0 ? 1 : 0;
137212397c6Schristos }
138212397c6Schristos 
139212397c6Schristos /* get a byte from the file, bail if end of file */
140212397c6Schristos #define bget(in) (in->left ? 0 : bload(in), \
141212397c6Schristos                   in->left ? (in->left--, *(in->next)++) : \
142212397c6Schristos                     bail("unexpected end of file on ", in->name))
143212397c6Schristos 
144212397c6Schristos /* get a four-byte little-endian unsigned integer from file */
bget4(bin * in)145212397c6Schristos local unsigned long bget4(bin *in)
146212397c6Schristos {
147212397c6Schristos     unsigned long val;
148212397c6Schristos 
149212397c6Schristos     val = bget(in);
150212397c6Schristos     val += (unsigned long)(bget(in)) << 8;
151212397c6Schristos     val += (unsigned long)(bget(in)) << 16;
152212397c6Schristos     val += (unsigned long)(bget(in)) << 24;
153212397c6Schristos     return val;
154212397c6Schristos }
155212397c6Schristos 
156212397c6Schristos /* skip bytes in file */
bskip(bin * in,unsigned skip)157212397c6Schristos local void bskip(bin *in, unsigned skip)
158212397c6Schristos {
159212397c6Schristos     /* check pointer */
160212397c6Schristos     if (in == NULL)
161212397c6Schristos         return;
162212397c6Schristos 
163212397c6Schristos     /* easy case -- skip bytes in buffer */
164212397c6Schristos     if (skip <= in->left) {
165212397c6Schristos         in->left -= skip;
166212397c6Schristos         in->next += skip;
167212397c6Schristos         return;
168212397c6Schristos     }
169212397c6Schristos 
170212397c6Schristos     /* skip what's in buffer, discard buffer contents */
171212397c6Schristos     skip -= in->left;
172212397c6Schristos     in->left = 0;
173212397c6Schristos 
174212397c6Schristos     /* seek past multiples of CHUNK bytes */
175212397c6Schristos     if (skip > CHUNK) {
176212397c6Schristos         unsigned left;
177212397c6Schristos 
178212397c6Schristos         left = skip & (CHUNK - 1);
179212397c6Schristos         if (left == 0) {
180212397c6Schristos             /* exact number of chunks: seek all the way minus one byte to check
181212397c6Schristos                for end-of-file with a read */
182212397c6Schristos             lseek(in->fd, skip - 1, SEEK_CUR);
183212397c6Schristos             if (read(in->fd, in->buf, 1) != 1)
184212397c6Schristos                 bail("unexpected end of file on ", in->name);
185212397c6Schristos             return;
186212397c6Schristos         }
187212397c6Schristos 
188212397c6Schristos         /* skip the integral chunks, update skip with remainder */
189212397c6Schristos         lseek(in->fd, skip - left, SEEK_CUR);
190212397c6Schristos         skip = left;
191212397c6Schristos     }
192212397c6Schristos 
193212397c6Schristos     /* read more input and skip remainder */
194212397c6Schristos     bload(in);
195212397c6Schristos     if (skip > in->left)
196212397c6Schristos         bail("unexpected end of file on ", in->name);
197212397c6Schristos     in->left -= skip;
198212397c6Schristos     in->next += skip;
199212397c6Schristos }
200212397c6Schristos 
201212397c6Schristos /* -- end of buffered input functions -- */
202212397c6Schristos 
203212397c6Schristos /* skip the gzip header from file in */
gzhead(bin * in)204212397c6Schristos local void gzhead(bin *in)
205212397c6Schristos {
206212397c6Schristos     int flags;
207212397c6Schristos 
208212397c6Schristos     /* verify gzip magic header and compression method */
209212397c6Schristos     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
210212397c6Schristos         bail(in->name, " is not a valid gzip file");
211212397c6Schristos 
212212397c6Schristos     /* get and verify flags */
213212397c6Schristos     flags = bget(in);
214212397c6Schristos     if ((flags & 0xe0) != 0)
215212397c6Schristos         bail("unknown reserved bits set in ", in->name);
216212397c6Schristos 
217212397c6Schristos     /* skip modification time, extra flags, and os */
218212397c6Schristos     bskip(in, 6);
219212397c6Schristos 
220212397c6Schristos     /* skip extra field if present */
221212397c6Schristos     if (flags & 4) {
222212397c6Schristos         unsigned len;
223212397c6Schristos 
224212397c6Schristos         len = bget(in);
225212397c6Schristos         len += (unsigned)(bget(in)) << 8;
226212397c6Schristos         bskip(in, len);
227212397c6Schristos     }
228212397c6Schristos 
229212397c6Schristos     /* skip file name if present */
230212397c6Schristos     if (flags & 8)
231212397c6Schristos         while (bget(in) != 0)
232212397c6Schristos             ;
233212397c6Schristos 
234212397c6Schristos     /* skip comment if present */
235212397c6Schristos     if (flags & 16)
236212397c6Schristos         while (bget(in) != 0)
237212397c6Schristos             ;
238212397c6Schristos 
239212397c6Schristos     /* skip header crc if present */
240212397c6Schristos     if (flags & 2)
241212397c6Schristos         bskip(in, 2);
242212397c6Schristos }
243212397c6Schristos 
244212397c6Schristos /* write a four-byte little-endian unsigned integer to out */
put4(unsigned long val,FILE * out)245212397c6Schristos local void put4(unsigned long val, FILE *out)
246212397c6Schristos {
247212397c6Schristos     putc(val & 0xff, out);
248212397c6Schristos     putc((val >> 8) & 0xff, out);
249212397c6Schristos     putc((val >> 16) & 0xff, out);
250212397c6Schristos     putc((val >> 24) & 0xff, out);
251212397c6Schristos }
252212397c6Schristos 
253212397c6Schristos /* Load up zlib stream from buffered input, bail if end of file */
zpull(z_streamp strm,bin * in)254212397c6Schristos local void zpull(z_streamp strm, bin *in)
255212397c6Schristos {
256212397c6Schristos     if (in->left == 0)
257212397c6Schristos         bload(in);
258212397c6Schristos     if (in->left == 0)
259212397c6Schristos         bail("unexpected end of file on ", in->name);
260212397c6Schristos     strm->avail_in = in->left;
261212397c6Schristos     strm->next_in = in->next;
262212397c6Schristos }
263212397c6Schristos 
264212397c6Schristos /* Write header for gzip file to out and initialize trailer. */
gzinit(unsigned long * crc,unsigned long * tot,FILE * out)265212397c6Schristos local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
266212397c6Schristos {
267212397c6Schristos     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
268212397c6Schristos     *crc = crc32(0L, Z_NULL, 0);
269212397c6Schristos     *tot = 0;
270212397c6Schristos }
271212397c6Schristos 
272212397c6Schristos /* Copy the compressed data from name, zeroing the last block bit of the last
273212397c6Schristos    block if clr is true, and adding empty blocks as needed to get to a byte
274212397c6Schristos    boundary.  If clr is false, then the last block becomes the last block of
275212397c6Schristos    the output, and the gzip trailer is written.  crc and tot maintains the
276212397c6Schristos    crc and length (modulo 2^32) of the output for the trailer.  The resulting
277212397c6Schristos    gzip file is written to out.  gzinit() must be called before the first call
278212397c6Schristos    of gzcopy() to write the gzip header and to initialize crc and tot. */
gzcopy(char * name,int clr,unsigned long * crc,unsigned long * tot,FILE * out)279212397c6Schristos local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
280212397c6Schristos                   FILE *out)
281212397c6Schristos {
282212397c6Schristos     int ret;                /* return value from zlib functions */
283212397c6Schristos     int pos;                /* where the "last block" bit is in byte */
284212397c6Schristos     int last;               /* true if processing the last block */
285212397c6Schristos     bin *in;                /* buffered input file */
286212397c6Schristos     unsigned char *start;   /* start of compressed data in buffer */
287212397c6Schristos     unsigned char *junk;    /* buffer for uncompressed data -- discarded */
288212397c6Schristos     z_off_t len;            /* length of uncompressed data (support > 4 GB) */
289212397c6Schristos     z_stream strm;          /* zlib inflate stream */
290212397c6Schristos 
291212397c6Schristos     /* open gzip file and skip header */
292212397c6Schristos     in = bopen(name);
293212397c6Schristos     if (in == NULL)
294212397c6Schristos         bail("could not open ", name);
295212397c6Schristos     gzhead(in);
296212397c6Schristos 
297212397c6Schristos     /* allocate buffer for uncompressed data and initialize raw inflate
298212397c6Schristos        stream */
299212397c6Schristos     junk = malloc(CHUNK);
300212397c6Schristos     strm.zalloc = Z_NULL;
301212397c6Schristos     strm.zfree = Z_NULL;
302212397c6Schristos     strm.opaque = Z_NULL;
303212397c6Schristos     strm.avail_in = 0;
304212397c6Schristos     strm.next_in = Z_NULL;
305212397c6Schristos     ret = inflateInit2(&strm, -15);
306212397c6Schristos     if (junk == NULL || ret != Z_OK)
307212397c6Schristos         bail("out of memory", "");
308212397c6Schristos 
309212397c6Schristos     /* inflate and copy compressed data, clear last-block bit if requested */
310212397c6Schristos     len = 0;
311212397c6Schristos     zpull(&strm, in);
312*ba340e45Schristos     start = in->next;
313212397c6Schristos     last = start[0] & 1;
314212397c6Schristos     if (last && clr)
315212397c6Schristos         start[0] &= ~1;
316212397c6Schristos     strm.avail_out = 0;
317212397c6Schristos     for (;;) {
318212397c6Schristos         /* if input used and output done, write used input and get more */
319212397c6Schristos         if (strm.avail_in == 0 && strm.avail_out != 0) {
320212397c6Schristos             fwrite(start, 1, strm.next_in - start, out);
321212397c6Schristos             start = in->buf;
322212397c6Schristos             in->left = 0;
323212397c6Schristos             zpull(&strm, in);
324212397c6Schristos         }
325212397c6Schristos 
326212397c6Schristos         /* decompress -- return early when end-of-block reached */
327212397c6Schristos         strm.avail_out = CHUNK;
328212397c6Schristos         strm.next_out = junk;
329212397c6Schristos         ret = inflate(&strm, Z_BLOCK);
330212397c6Schristos         switch (ret) {
331212397c6Schristos         case Z_MEM_ERROR:
332212397c6Schristos             bail("out of memory", "");
333212397c6Schristos         case Z_DATA_ERROR:
334212397c6Schristos             bail("invalid compressed data in ", in->name);
335212397c6Schristos         }
336212397c6Schristos 
337212397c6Schristos         /* update length of uncompressed data */
338212397c6Schristos         len += CHUNK - strm.avail_out;
339212397c6Schristos 
340212397c6Schristos         /* check for block boundary (only get this when block copied out) */
341212397c6Schristos         if (strm.data_type & 128) {
342212397c6Schristos             /* if that was the last block, then done */
343212397c6Schristos             if (last)
344212397c6Schristos                 break;
345212397c6Schristos 
346212397c6Schristos             /* number of unused bits in last byte */
347212397c6Schristos             pos = strm.data_type & 7;
348212397c6Schristos 
349212397c6Schristos             /* find the next last-block bit */
350212397c6Schristos             if (pos != 0) {
351212397c6Schristos                 /* next last-block bit is in last used byte */
352212397c6Schristos                 pos = 0x100 >> pos;
353212397c6Schristos                 last = strm.next_in[-1] & pos;
354212397c6Schristos                 if (last && clr)
355*ba340e45Schristos                     in->buf[strm.next_in - in->buf - 1] &= ~pos;
356212397c6Schristos             }
357212397c6Schristos             else {
358212397c6Schristos                 /* next last-block bit is in next unused byte */
359212397c6Schristos                 if (strm.avail_in == 0) {
360212397c6Schristos                     /* don't have that byte yet -- get it */
361212397c6Schristos                     fwrite(start, 1, strm.next_in - start, out);
362212397c6Schristos                     start = in->buf;
363212397c6Schristos                     in->left = 0;
364212397c6Schristos                     zpull(&strm, in);
365212397c6Schristos                 }
366212397c6Schristos                 last = strm.next_in[0] & 1;
367212397c6Schristos                 if (last && clr)
368*ba340e45Schristos                     in->buf[strm.next_in - in->buf] &= ~1;
369212397c6Schristos             }
370212397c6Schristos         }
371212397c6Schristos     }
372212397c6Schristos 
373212397c6Schristos     /* update buffer with unused input */
374212397c6Schristos     in->left = strm.avail_in;
375*ba340e45Schristos     in->next = in->buf + (strm.next_in - in->buf);
376212397c6Schristos 
377212397c6Schristos     /* copy used input, write empty blocks to get to byte boundary */
378212397c6Schristos     pos = strm.data_type & 7;
379212397c6Schristos     fwrite(start, 1, in->next - start - 1, out);
380212397c6Schristos     last = in->next[-1];
381212397c6Schristos     if (pos == 0 || !clr)
382212397c6Schristos         /* already at byte boundary, or last file: write last byte */
383212397c6Schristos         putc(last, out);
384212397c6Schristos     else {
385212397c6Schristos         /* append empty blocks to last byte */
386212397c6Schristos         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
387212397c6Schristos         if (pos & 1) {
388212397c6Schristos             /* odd -- append an empty stored block */
389212397c6Schristos             putc(last, out);
390212397c6Schristos             if (pos == 1)
391212397c6Schristos                 putc(0, out);               /* two more bits in block header */
392212397c6Schristos             fwrite("\0\0\xff\xff", 1, 4, out);
393212397c6Schristos         }
394212397c6Schristos         else {
395212397c6Schristos             /* even -- append 1, 2, or 3 empty fixed blocks */
396212397c6Schristos             switch (pos) {
397212397c6Schristos             case 6:
398212397c6Schristos                 putc(last | 8, out);
399212397c6Schristos                 last = 0;
400212397c6Schristos             case 4:
401212397c6Schristos                 putc(last | 0x20, out);
402212397c6Schristos                 last = 0;
403212397c6Schristos             case 2:
404212397c6Schristos                 putc(last | 0x80, out);
405212397c6Schristos                 putc(0, out);
406212397c6Schristos             }
407212397c6Schristos         }
408212397c6Schristos     }
409212397c6Schristos 
410212397c6Schristos     /* update crc and tot */
411212397c6Schristos     *crc = crc32_combine(*crc, bget4(in), len);
412212397c6Schristos     *tot += (unsigned long)len;
413212397c6Schristos 
414212397c6Schristos     /* clean up */
415212397c6Schristos     inflateEnd(&strm);
416212397c6Schristos     free(junk);
417212397c6Schristos     bclose(in);
418212397c6Schristos 
419212397c6Schristos     /* write trailer if this is the last gzip file */
420212397c6Schristos     if (!clr) {
421212397c6Schristos         put4(*crc, out);
422212397c6Schristos         put4(*tot, out);
423212397c6Schristos     }
424212397c6Schristos }
425212397c6Schristos 
426212397c6Schristos /* join the gzip files on the command line, write result to stdout */
main(int argc,char ** argv)427212397c6Schristos int main(int argc, char **argv)
428212397c6Schristos {
429212397c6Schristos     unsigned long crc, tot;     /* running crc and total uncompressed length */
430212397c6Schristos 
431212397c6Schristos     /* skip command name */
432212397c6Schristos     argc--;
433212397c6Schristos     argv++;
434212397c6Schristos 
435212397c6Schristos     /* show usage if no arguments */
436212397c6Schristos     if (argc == 0) {
437212397c6Schristos         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
438212397c6Schristos               stderr);
439212397c6Schristos         return 0;
440212397c6Schristos     }
441212397c6Schristos 
442212397c6Schristos     /* join gzip files on command line and write to stdout */
443212397c6Schristos     gzinit(&crc, &tot, stdout);
444212397c6Schristos     while (argc--)
445212397c6Schristos         gzcopy(*argv++, argc, &crc, &tot, stdout);
446212397c6Schristos 
447212397c6Schristos     /* done */
448212397c6Schristos     return 0;
449212397c6Schristos }
450