xref: /netbsd-src/external/gpl3/binutils/dist/zlib/examples/gzjoin.c (revision 8cbf5cb7c47586c462bde6a6b4444605bb6ef4e2)
19573673dSchristos /* gzjoin -- command to join gzip files into one gzip file
29573673dSchristos 
3*8cbf5cb7Schristos   Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
4*8cbf5cb7Schristos   version 1.2, 14 Aug 2012
59573673dSchristos 
69573673dSchristos   This software is provided 'as-is', without any express or implied
79573673dSchristos   warranty.  In no event will the author be held liable for any damages
89573673dSchristos   arising from the use of this software.
99573673dSchristos 
109573673dSchristos   Permission is granted to anyone to use this software for any purpose,
119573673dSchristos   including commercial applications, and to alter it and redistribute it
129573673dSchristos   freely, subject to the following restrictions:
139573673dSchristos 
149573673dSchristos   1. The origin of this software must not be misrepresented; you must not
159573673dSchristos      claim that you wrote the original software. If you use this software
169573673dSchristos      in a product, an acknowledgment in the product documentation would be
179573673dSchristos      appreciated but is not required.
189573673dSchristos   2. Altered source versions must be plainly marked as such, and must not be
199573673dSchristos      misrepresented as being the original software.
209573673dSchristos   3. This notice may not be removed or altered from any source distribution.
219573673dSchristos 
229573673dSchristos   Mark Adler    madler@alumni.caltech.edu
239573673dSchristos  */
249573673dSchristos 
259573673dSchristos /*
269573673dSchristos  * Change history:
279573673dSchristos  *
289573673dSchristos  * 1.0  11 Dec 2004     - First version
299573673dSchristos  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
30*8cbf5cb7Schristos  * 1.2  14 Aug 2012     - Clean up for z_const usage
319573673dSchristos  */
329573673dSchristos 
339573673dSchristos /*
349573673dSchristos    gzjoin takes one or more gzip files on the command line and writes out a
359573673dSchristos    single gzip file that will uncompress to the concatenation of the
369573673dSchristos    uncompressed data from the individual gzip files.  gzjoin does this without
379573673dSchristos    having to recompress any of the data and without having to calculate a new
389573673dSchristos    crc32 for the concatenated uncompressed data.  gzjoin does however have to
399573673dSchristos    decompress all of the input data in order to find the bits in the compressed
409573673dSchristos    data that need to be modified to concatenate the streams.
419573673dSchristos 
429573673dSchristos    gzjoin does not do an integrity check on the input gzip files other than
439573673dSchristos    checking the gzip header and decompressing the compressed data.  They are
449573673dSchristos    otherwise assumed to be complete and correct.
459573673dSchristos 
469573673dSchristos    Each joint between gzip files removes at least 18 bytes of previous trailer
479573673dSchristos    and subsequent header, and inserts an average of about three bytes to the
489573673dSchristos    compressed data in order to connect the streams.  The output gzip file
499573673dSchristos    has a minimal ten-byte gzip header with no file name or modification time.
509573673dSchristos 
519573673dSchristos    This program was written to illustrate the use of the Z_BLOCK option of
529573673dSchristos    inflate() and the crc32_combine() function.  gzjoin will not compile with
539573673dSchristos    versions of zlib earlier than 1.2.3.
549573673dSchristos  */
559573673dSchristos 
569573673dSchristos #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
579573673dSchristos #include <stdlib.h>     /* exit(), malloc(), free() */
589573673dSchristos #include <fcntl.h>      /* open() */
599573673dSchristos #include <unistd.h>     /* close(), read(), lseek() */
609573673dSchristos #include "zlib.h"
619573673dSchristos     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
629573673dSchristos 
639573673dSchristos #define local static
649573673dSchristos 
659573673dSchristos /* exit with an error (return a value to allow use in an expression) */
bail(char * why1,char * why2)669573673dSchristos local int bail(char *why1, char *why2)
679573673dSchristos {
689573673dSchristos     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
699573673dSchristos     exit(1);
709573673dSchristos     return 0;
719573673dSchristos }
729573673dSchristos 
739573673dSchristos /* -- simple buffered file input with access to the buffer -- */
749573673dSchristos 
759573673dSchristos #define CHUNK 32768         /* must be a power of two and fit in unsigned */
769573673dSchristos 
779573673dSchristos /* bin buffered input file type */
789573673dSchristos typedef struct {
799573673dSchristos     char *name;             /* name of file for error messages */
809573673dSchristos     int fd;                 /* file descriptor */
819573673dSchristos     unsigned left;          /* bytes remaining at next */
829573673dSchristos     unsigned char *next;    /* next byte to read */
839573673dSchristos     unsigned char *buf;     /* allocated buffer of length CHUNK */
849573673dSchristos } bin;
859573673dSchristos 
869573673dSchristos /* close a buffered file and free allocated memory */
bclose(bin * in)879573673dSchristos local void bclose(bin *in)
889573673dSchristos {
899573673dSchristos     if (in != NULL) {
909573673dSchristos         if (in->fd != -1)
919573673dSchristos             close(in->fd);
929573673dSchristos         if (in->buf != NULL)
939573673dSchristos             free(in->buf);
949573673dSchristos         free(in);
959573673dSchristos     }
969573673dSchristos }
979573673dSchristos 
989573673dSchristos /* open a buffered file for input, return a pointer to type bin, or NULL on
999573673dSchristos    failure */
bopen(char * name)1009573673dSchristos local bin *bopen(char *name)
1019573673dSchristos {
1029573673dSchristos     bin *in;
1039573673dSchristos 
1049573673dSchristos     in = malloc(sizeof(bin));
1059573673dSchristos     if (in == NULL)
1069573673dSchristos         return NULL;
1079573673dSchristos     in->buf = malloc(CHUNK);
1089573673dSchristos     in->fd = open(name, O_RDONLY, 0);
1099573673dSchristos     if (in->buf == NULL || in->fd == -1) {
1109573673dSchristos         bclose(in);
1119573673dSchristos         return NULL;
1129573673dSchristos     }
1139573673dSchristos     in->left = 0;
1149573673dSchristos     in->next = in->buf;
1159573673dSchristos     in->name = name;
1169573673dSchristos     return in;
1179573673dSchristos }
1189573673dSchristos 
1199573673dSchristos /* load buffer from file, return -1 on read error, 0 or 1 on success, with
1209573673dSchristos    1 indicating that end-of-file was reached */
bload(bin * in)1219573673dSchristos local int bload(bin *in)
1229573673dSchristos {
1239573673dSchristos     long len;
1249573673dSchristos 
1259573673dSchristos     if (in == NULL)
1269573673dSchristos         return -1;
1279573673dSchristos     if (in->left != 0)
1289573673dSchristos         return 0;
1299573673dSchristos     in->next = in->buf;
1309573673dSchristos     do {
1319573673dSchristos         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
1329573673dSchristos         if (len < 0)
1339573673dSchristos             return -1;
1349573673dSchristos         in->left += (unsigned)len;
1359573673dSchristos     } while (len != 0 && in->left < CHUNK);
1369573673dSchristos     return len == 0 ? 1 : 0;
1379573673dSchristos }
1389573673dSchristos 
1399573673dSchristos /* get a byte from the file, bail if end of file */
1409573673dSchristos #define bget(in) (in->left ? 0 : bload(in), \
1419573673dSchristos                   in->left ? (in->left--, *(in->next)++) : \
1429573673dSchristos                     bail("unexpected end of file on ", in->name))
1439573673dSchristos 
1449573673dSchristos /* get a four-byte little-endian unsigned integer from file */
bget4(bin * in)1459573673dSchristos local unsigned long bget4(bin *in)
1469573673dSchristos {
1479573673dSchristos     unsigned long val;
1489573673dSchristos 
1499573673dSchristos     val = bget(in);
1509573673dSchristos     val += (unsigned long)(bget(in)) << 8;
1519573673dSchristos     val += (unsigned long)(bget(in)) << 16;
1529573673dSchristos     val += (unsigned long)(bget(in)) << 24;
1539573673dSchristos     return val;
1549573673dSchristos }
1559573673dSchristos 
1569573673dSchristos /* skip bytes in file */
bskip(bin * in,unsigned skip)1579573673dSchristos local void bskip(bin *in, unsigned skip)
1589573673dSchristos {
1599573673dSchristos     /* check pointer */
1609573673dSchristos     if (in == NULL)
1619573673dSchristos         return;
1629573673dSchristos 
1639573673dSchristos     /* easy case -- skip bytes in buffer */
1649573673dSchristos     if (skip <= in->left) {
1659573673dSchristos         in->left -= skip;
1669573673dSchristos         in->next += skip;
1679573673dSchristos         return;
1689573673dSchristos     }
1699573673dSchristos 
1709573673dSchristos     /* skip what's in buffer, discard buffer contents */
1719573673dSchristos     skip -= in->left;
1729573673dSchristos     in->left = 0;
1739573673dSchristos 
1749573673dSchristos     /* seek past multiples of CHUNK bytes */
1759573673dSchristos     if (skip > CHUNK) {
1769573673dSchristos         unsigned left;
1779573673dSchristos 
1789573673dSchristos         left = skip & (CHUNK - 1);
1799573673dSchristos         if (left == 0) {
1809573673dSchristos             /* exact number of chunks: seek all the way minus one byte to check
1819573673dSchristos                for end-of-file with a read */
1829573673dSchristos             lseek(in->fd, skip - 1, SEEK_CUR);
1839573673dSchristos             if (read(in->fd, in->buf, 1) != 1)
1849573673dSchristos                 bail("unexpected end of file on ", in->name);
1859573673dSchristos             return;
1869573673dSchristos         }
1879573673dSchristos 
1889573673dSchristos         /* skip the integral chunks, update skip with remainder */
1899573673dSchristos         lseek(in->fd, skip - left, SEEK_CUR);
1909573673dSchristos         skip = left;
1919573673dSchristos     }
1929573673dSchristos 
1939573673dSchristos     /* read more input and skip remainder */
1949573673dSchristos     bload(in);
1959573673dSchristos     if (skip > in->left)
1969573673dSchristos         bail("unexpected end of file on ", in->name);
1979573673dSchristos     in->left -= skip;
1989573673dSchristos     in->next += skip;
1999573673dSchristos }
2009573673dSchristos 
2019573673dSchristos /* -- end of buffered input functions -- */
2029573673dSchristos 
2039573673dSchristos /* skip the gzip header from file in */
gzhead(bin * in)2049573673dSchristos local void gzhead(bin *in)
2059573673dSchristos {
2069573673dSchristos     int flags;
2079573673dSchristos 
2089573673dSchristos     /* verify gzip magic header and compression method */
2099573673dSchristos     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
2109573673dSchristos         bail(in->name, " is not a valid gzip file");
2119573673dSchristos 
2129573673dSchristos     /* get and verify flags */
2139573673dSchristos     flags = bget(in);
2149573673dSchristos     if ((flags & 0xe0) != 0)
2159573673dSchristos         bail("unknown reserved bits set in ", in->name);
2169573673dSchristos 
2179573673dSchristos     /* skip modification time, extra flags, and os */
2189573673dSchristos     bskip(in, 6);
2199573673dSchristos 
2209573673dSchristos     /* skip extra field if present */
2219573673dSchristos     if (flags & 4) {
2229573673dSchristos         unsigned len;
2239573673dSchristos 
2249573673dSchristos         len = bget(in);
2259573673dSchristos         len += (unsigned)(bget(in)) << 8;
2269573673dSchristos         bskip(in, len);
2279573673dSchristos     }
2289573673dSchristos 
2299573673dSchristos     /* skip file name if present */
2309573673dSchristos     if (flags & 8)
2319573673dSchristos         while (bget(in) != 0)
2329573673dSchristos             ;
2339573673dSchristos 
2349573673dSchristos     /* skip comment if present */
2359573673dSchristos     if (flags & 16)
2369573673dSchristos         while (bget(in) != 0)
2379573673dSchristos             ;
2389573673dSchristos 
2399573673dSchristos     /* skip header crc if present */
2409573673dSchristos     if (flags & 2)
2419573673dSchristos         bskip(in, 2);
2429573673dSchristos }
2439573673dSchristos 
2449573673dSchristos /* write a four-byte little-endian unsigned integer to out */
put4(unsigned long val,FILE * out)2459573673dSchristos local void put4(unsigned long val, FILE *out)
2469573673dSchristos {
2479573673dSchristos     putc(val & 0xff, out);
2489573673dSchristos     putc((val >> 8) & 0xff, out);
2499573673dSchristos     putc((val >> 16) & 0xff, out);
2509573673dSchristos     putc((val >> 24) & 0xff, out);
2519573673dSchristos }
2529573673dSchristos 
2539573673dSchristos /* Load up zlib stream from buffered input, bail if end of file */
zpull(z_streamp strm,bin * in)2549573673dSchristos local void zpull(z_streamp strm, bin *in)
2559573673dSchristos {
2569573673dSchristos     if (in->left == 0)
2579573673dSchristos         bload(in);
2589573673dSchristos     if (in->left == 0)
2599573673dSchristos         bail("unexpected end of file on ", in->name);
2609573673dSchristos     strm->avail_in = in->left;
2619573673dSchristos     strm->next_in = in->next;
2629573673dSchristos }
2639573673dSchristos 
2649573673dSchristos /* Write header for gzip file to out and initialize trailer. */
gzinit(unsigned long * crc,unsigned long * tot,FILE * out)2659573673dSchristos local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
2669573673dSchristos {
2679573673dSchristos     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
2689573673dSchristos     *crc = crc32(0L, Z_NULL, 0);
2699573673dSchristos     *tot = 0;
2709573673dSchristos }
2719573673dSchristos 
2729573673dSchristos /* Copy the compressed data from name, zeroing the last block bit of the last
2739573673dSchristos    block if clr is true, and adding empty blocks as needed to get to a byte
2749573673dSchristos    boundary.  If clr is false, then the last block becomes the last block of
2759573673dSchristos    the output, and the gzip trailer is written.  crc and tot maintains the
2769573673dSchristos    crc and length (modulo 2^32) of the output for the trailer.  The resulting
2779573673dSchristos    gzip file is written to out.  gzinit() must be called before the first call
2789573673dSchristos    of gzcopy() to write the gzip header and to initialize crc and tot. */
gzcopy(char * name,int clr,unsigned long * crc,unsigned long * tot,FILE * out)2799573673dSchristos local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
2809573673dSchristos                   FILE *out)
2819573673dSchristos {
2829573673dSchristos     int ret;                /* return value from zlib functions */
2839573673dSchristos     int pos;                /* where the "last block" bit is in byte */
2849573673dSchristos     int last;               /* true if processing the last block */
2859573673dSchristos     bin *in;                /* buffered input file */
2869573673dSchristos     unsigned char *start;   /* start of compressed data in buffer */
2879573673dSchristos     unsigned char *junk;    /* buffer for uncompressed data -- discarded */
2889573673dSchristos     z_off_t len;            /* length of uncompressed data (support > 4 GB) */
2899573673dSchristos     z_stream strm;          /* zlib inflate stream */
2909573673dSchristos 
2919573673dSchristos     /* open gzip file and skip header */
2929573673dSchristos     in = bopen(name);
2939573673dSchristos     if (in == NULL)
2949573673dSchristos         bail("could not open ", name);
2959573673dSchristos     gzhead(in);
2969573673dSchristos 
2979573673dSchristos     /* allocate buffer for uncompressed data and initialize raw inflate
2989573673dSchristos        stream */
2999573673dSchristos     junk = malloc(CHUNK);
3009573673dSchristos     strm.zalloc = Z_NULL;
3019573673dSchristos     strm.zfree = Z_NULL;
3029573673dSchristos     strm.opaque = Z_NULL;
3039573673dSchristos     strm.avail_in = 0;
3049573673dSchristos     strm.next_in = Z_NULL;
3059573673dSchristos     ret = inflateInit2(&strm, -15);
3069573673dSchristos     if (junk == NULL || ret != Z_OK)
3079573673dSchristos         bail("out of memory", "");
3089573673dSchristos 
3099573673dSchristos     /* inflate and copy compressed data, clear last-block bit if requested */
3109573673dSchristos     len = 0;
3119573673dSchristos     zpull(&strm, in);
312*8cbf5cb7Schristos     start = in->next;
3139573673dSchristos     last = start[0] & 1;
3149573673dSchristos     if (last && clr)
3159573673dSchristos         start[0] &= ~1;
3169573673dSchristos     strm.avail_out = 0;
3179573673dSchristos     for (;;) {
3189573673dSchristos         /* if input used and output done, write used input and get more */
3199573673dSchristos         if (strm.avail_in == 0 && strm.avail_out != 0) {
3209573673dSchristos             fwrite(start, 1, strm.next_in - start, out);
3219573673dSchristos             start = in->buf;
3229573673dSchristos             in->left = 0;
3239573673dSchristos             zpull(&strm, in);
3249573673dSchristos         }
3259573673dSchristos 
3269573673dSchristos         /* decompress -- return early when end-of-block reached */
3279573673dSchristos         strm.avail_out = CHUNK;
3289573673dSchristos         strm.next_out = junk;
3299573673dSchristos         ret = inflate(&strm, Z_BLOCK);
3309573673dSchristos         switch (ret) {
3319573673dSchristos         case Z_MEM_ERROR:
3329573673dSchristos             bail("out of memory", "");
3339573673dSchristos         case Z_DATA_ERROR:
3349573673dSchristos             bail("invalid compressed data in ", in->name);
3359573673dSchristos         }
3369573673dSchristos 
3379573673dSchristos         /* update length of uncompressed data */
3389573673dSchristos         len += CHUNK - strm.avail_out;
3399573673dSchristos 
3409573673dSchristos         /* check for block boundary (only get this when block copied out) */
3419573673dSchristos         if (strm.data_type & 128) {
3429573673dSchristos             /* if that was the last block, then done */
3439573673dSchristos             if (last)
3449573673dSchristos                 break;
3459573673dSchristos 
3469573673dSchristos             /* number of unused bits in last byte */
3479573673dSchristos             pos = strm.data_type & 7;
3489573673dSchristos 
3499573673dSchristos             /* find the next last-block bit */
3509573673dSchristos             if (pos != 0) {
3519573673dSchristos                 /* next last-block bit is in last used byte */
3529573673dSchristos                 pos = 0x100 >> pos;
3539573673dSchristos                 last = strm.next_in[-1] & pos;
3549573673dSchristos                 if (last && clr)
355*8cbf5cb7Schristos                     in->buf[strm.next_in - in->buf - 1] &= ~pos;
3569573673dSchristos             }
3579573673dSchristos             else {
3589573673dSchristos                 /* next last-block bit is in next unused byte */
3599573673dSchristos                 if (strm.avail_in == 0) {
3609573673dSchristos                     /* don't have that byte yet -- get it */
3619573673dSchristos                     fwrite(start, 1, strm.next_in - start, out);
3629573673dSchristos                     start = in->buf;
3639573673dSchristos                     in->left = 0;
3649573673dSchristos                     zpull(&strm, in);
3659573673dSchristos                 }
3669573673dSchristos                 last = strm.next_in[0] & 1;
3679573673dSchristos                 if (last && clr)
368*8cbf5cb7Schristos                     in->buf[strm.next_in - in->buf] &= ~1;
3699573673dSchristos             }
3709573673dSchristos         }
3719573673dSchristos     }
3729573673dSchristos 
3739573673dSchristos     /* update buffer with unused input */
3749573673dSchristos     in->left = strm.avail_in;
375*8cbf5cb7Schristos     in->next = in->buf + (strm.next_in - in->buf);
3769573673dSchristos 
3779573673dSchristos     /* copy used input, write empty blocks to get to byte boundary */
3789573673dSchristos     pos = strm.data_type & 7;
3799573673dSchristos     fwrite(start, 1, in->next - start - 1, out);
3809573673dSchristos     last = in->next[-1];
3819573673dSchristos     if (pos == 0 || !clr)
3829573673dSchristos         /* already at byte boundary, or last file: write last byte */
3839573673dSchristos         putc(last, out);
3849573673dSchristos     else {
3859573673dSchristos         /* append empty blocks to last byte */
3869573673dSchristos         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
3879573673dSchristos         if (pos & 1) {
3889573673dSchristos             /* odd -- append an empty stored block */
3899573673dSchristos             putc(last, out);
3909573673dSchristos             if (pos == 1)
3919573673dSchristos                 putc(0, out);               /* two more bits in block header */
3929573673dSchristos             fwrite("\0\0\xff\xff", 1, 4, out);
3939573673dSchristos         }
3949573673dSchristos         else {
3959573673dSchristos             /* even -- append 1, 2, or 3 empty fixed blocks */
3969573673dSchristos             switch (pos) {
3979573673dSchristos             case 6:
3989573673dSchristos                 putc(last | 8, out);
3999573673dSchristos                 last = 0;
4009573673dSchristos             case 4:
4019573673dSchristos                 putc(last | 0x20, out);
4029573673dSchristos                 last = 0;
4039573673dSchristos             case 2:
4049573673dSchristos                 putc(last | 0x80, out);
4059573673dSchristos                 putc(0, out);
4069573673dSchristos             }
4079573673dSchristos         }
4089573673dSchristos     }
4099573673dSchristos 
4109573673dSchristos     /* update crc and tot */
4119573673dSchristos     *crc = crc32_combine(*crc, bget4(in), len);
4129573673dSchristos     *tot += (unsigned long)len;
4139573673dSchristos 
4149573673dSchristos     /* clean up */
4159573673dSchristos     inflateEnd(&strm);
4169573673dSchristos     free(junk);
4179573673dSchristos     bclose(in);
4189573673dSchristos 
4199573673dSchristos     /* write trailer if this is the last gzip file */
4209573673dSchristos     if (!clr) {
4219573673dSchristos         put4(*crc, out);
4229573673dSchristos         put4(*tot, out);
4239573673dSchristos     }
4249573673dSchristos }
4259573673dSchristos 
4269573673dSchristos /* join the gzip files on the command line, write result to stdout */
main(int argc,char ** argv)4279573673dSchristos int main(int argc, char **argv)
4289573673dSchristos {
4299573673dSchristos     unsigned long crc, tot;     /* running crc and total uncompressed length */
4309573673dSchristos 
4319573673dSchristos     /* skip command name */
4329573673dSchristos     argc--;
4339573673dSchristos     argv++;
4349573673dSchristos 
4359573673dSchristos     /* show usage if no arguments */
4369573673dSchristos     if (argc == 0) {
4379573673dSchristos         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
4389573673dSchristos               stderr);
4399573673dSchristos         return 0;
4409573673dSchristos     }
4419573673dSchristos 
4429573673dSchristos     /* join gzip files on command line and write to stdout */
4439573673dSchristos     gzinit(&crc, &tot, stdout);
4449573673dSchristos     while (argc--)
4459573673dSchristos         gzcopy(*argv++, argc, &crc, &tot, stdout);
4469573673dSchristos 
4479573673dSchristos     /* done */
4489573673dSchristos     return 0;
4499573673dSchristos }
450