1*ec47cc4bSchristos /* gznorm.c -- normalize a gzip stream
2*ec47cc4bSchristos * Copyright (C) 2018 Mark Adler
3*ec47cc4bSchristos * For conditions of distribution and use, see copyright notice in zlib.h
4*ec47cc4bSchristos * Version 1.0 7 Oct 2018 Mark Adler */
5*ec47cc4bSchristos
6*ec47cc4bSchristos // gznorm takes a gzip stream, potentially containing multiple members, and
7*ec47cc4bSchristos // converts it to a gzip stream with a single member. In addition the gzip
8*ec47cc4bSchristos // header is normalized, removing the file name and time stamp, and setting the
9*ec47cc4bSchristos // other header contents (XFL, OS) to fixed values. gznorm does not recompress
10*ec47cc4bSchristos // the data, so it is fast, but no advantage is gained from the history that
11*ec47cc4bSchristos // could be available across member boundaries.
12*ec47cc4bSchristos
13*ec47cc4bSchristos #include <stdio.h> // fread, fwrite, putc, fflush, ferror, fprintf,
14*ec47cc4bSchristos // vsnprintf, stdout, stderr, NULL, FILE
15*ec47cc4bSchristos #include <stdlib.h> // malloc, free
16*ec47cc4bSchristos #include <string.h> // strerror
17*ec47cc4bSchristos #include <errno.h> // errno
18*ec47cc4bSchristos #include <stdarg.h> // va_list, va_start, va_end
19*ec47cc4bSchristos #include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd,
20*ec47cc4bSchristos // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK,
21*ec47cc4bSchristos // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR,
22*ec47cc4bSchristos // Z_MEM_ERROR
23*ec47cc4bSchristos
24*ec47cc4bSchristos #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25*ec47cc4bSchristos # include <fcntl.h>
26*ec47cc4bSchristos # include <io.h>
27*ec47cc4bSchristos # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
28*ec47cc4bSchristos #else
29*ec47cc4bSchristos # define SET_BINARY_MODE(file)
30*ec47cc4bSchristos #endif
31*ec47cc4bSchristos
32*ec47cc4bSchristos #define local static
33*ec47cc4bSchristos
34*ec47cc4bSchristos // printf to an allocated string. Return the string, or NULL if the printf or
35*ec47cc4bSchristos // allocation fails.
aprintf(char * fmt,...)36*ec47cc4bSchristos local char *aprintf(char *fmt, ...) {
37*ec47cc4bSchristos // Get the length of the result of the printf.
38*ec47cc4bSchristos va_list args;
39*ec47cc4bSchristos va_start(args, fmt);
40*ec47cc4bSchristos int len = vsnprintf(NULL, 0, fmt, args);
41*ec47cc4bSchristos va_end(args);
42*ec47cc4bSchristos if (len < 0)
43*ec47cc4bSchristos return NULL;
44*ec47cc4bSchristos
45*ec47cc4bSchristos // Allocate the required space and printf to it.
46*ec47cc4bSchristos char *str = malloc(len + 1);
47*ec47cc4bSchristos if (str == NULL)
48*ec47cc4bSchristos return NULL;
49*ec47cc4bSchristos va_start(args, fmt);
50*ec47cc4bSchristos vsnprintf(str, len + 1, fmt, args);
51*ec47cc4bSchristos va_end(args);
52*ec47cc4bSchristos return str;
53*ec47cc4bSchristos }
54*ec47cc4bSchristos
55*ec47cc4bSchristos // Return with an error, putting an allocated error message in *err. Doing an
56*ec47cc4bSchristos // inflateEnd() on an already ended state, or one with state set to Z_NULL, is
57*ec47cc4bSchristos // permitted.
58*ec47cc4bSchristos #define BYE(...) \
59*ec47cc4bSchristos do { \
60*ec47cc4bSchristos inflateEnd(&strm); \
61*ec47cc4bSchristos *err = aprintf(__VA_ARGS__); \
62*ec47cc4bSchristos return 1; \
63*ec47cc4bSchristos } while (0)
64*ec47cc4bSchristos
65*ec47cc4bSchristos // Chunk size for buffered reads and for decompression. Twice this many bytes
66*ec47cc4bSchristos // will be allocated on the stack by gzip_normalize(). Must fit in an unsigned.
67*ec47cc4bSchristos #define CHUNK 16384
68*ec47cc4bSchristos
69*ec47cc4bSchristos // Read a gzip stream from in and write an equivalent normalized gzip stream to
70*ec47cc4bSchristos // out. If given no input, an empty gzip stream will be written. If successful,
71*ec47cc4bSchristos // 0 is returned, and *err is set to NULL. On error, 1 is returned, where the
72*ec47cc4bSchristos // details of the error are returned in *err, a pointer to an allocated string.
73*ec47cc4bSchristos //
74*ec47cc4bSchristos // The input may be a stream with multiple gzip members, which is converted to
75*ec47cc4bSchristos // a single gzip member on the output. Each gzip member is decompressed at the
76*ec47cc4bSchristos // level of deflate blocks. This enables clearing the last-block bit, shifting
77*ec47cc4bSchristos // the compressed data to concatenate to the previous member's compressed data,
78*ec47cc4bSchristos // which can end at an arbitrary bit boundary, and identifying stored blocks in
79*ec47cc4bSchristos // order to resynchronize those to byte boundaries. The deflate compressed data
80*ec47cc4bSchristos // is terminated with a 10-bit empty fixed block. If any members on the input
81*ec47cc4bSchristos // end with a 10-bit empty fixed block, then that block is excised from the
82*ec47cc4bSchristos // stream. This avoids appending empty fixed blocks for every normalization,
83*ec47cc4bSchristos // and assures that gzip_normalize applied a second time will not change the
84*ec47cc4bSchristos // input. The pad bits after stored block headers and after the final deflate
85*ec47cc4bSchristos // block are all forced to zeros.
gzip_normalize(FILE * in,FILE * out,char ** err)86*ec47cc4bSchristos local int gzip_normalize(FILE *in, FILE *out, char **err) {
87*ec47cc4bSchristos // initialize the inflate engine to process a gzip member
88*ec47cc4bSchristos z_stream strm;
89*ec47cc4bSchristos strm.zalloc = Z_NULL;
90*ec47cc4bSchristos strm.zfree = Z_NULL;
91*ec47cc4bSchristos strm.opaque = Z_NULL;
92*ec47cc4bSchristos strm.avail_in = 0;
93*ec47cc4bSchristos strm.next_in = Z_NULL;
94*ec47cc4bSchristos if (inflateInit2(&strm, 15 + 16) != Z_OK)
95*ec47cc4bSchristos BYE("out of memory");
96*ec47cc4bSchristos
97*ec47cc4bSchristos // State while processing the input gzip stream.
98*ec47cc4bSchristos enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ...
99*ec47cc4bSchristos BETWEEN, // between gzip members (must end in this state)
100*ec47cc4bSchristos HEAD, // reading a gzip header
101*ec47cc4bSchristos BLOCK, // reading deflate blocks
102*ec47cc4bSchristos TAIL // reading a gzip trailer
103*ec47cc4bSchristos } state = BETWEEN; // current component being processed
104*ec47cc4bSchristos unsigned long crc = 0; // accumulated CRC of uncompressed data
105*ec47cc4bSchristos unsigned long len = 0; // accumulated length of uncompressed data
106*ec47cc4bSchristos unsigned long buf = 0; // deflate stream bit buffer of num bits
107*ec47cc4bSchristos int num = 0; // number of bits in buf (at bottom)
108*ec47cc4bSchristos
109*ec47cc4bSchristos // Write a canonical gzip header (no mod time, file name, comment, extra
110*ec47cc4bSchristos // block, or extra flags, and OS is marked as unknown).
111*ec47cc4bSchristos fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
112*ec47cc4bSchristos
113*ec47cc4bSchristos // Process the gzip stream from in until reaching the end of the input,
114*ec47cc4bSchristos // encountering invalid input, or experiencing an i/o error.
115*ec47cc4bSchristos int more; // true if not at the end of the input
116*ec47cc4bSchristos do {
117*ec47cc4bSchristos // State inside this loop.
118*ec47cc4bSchristos unsigned char *put; // next input buffer location to process
119*ec47cc4bSchristos int prev; // number of bits from previous block in
120*ec47cc4bSchristos // the bit buffer, or -1 if not at the
121*ec47cc4bSchristos // start of a block
122*ec47cc4bSchristos unsigned long long memb; // uncompressed length of member
123*ec47cc4bSchristos size_t tail; // number of trailer bytes read (0..8)
124*ec47cc4bSchristos unsigned long part; // accumulated trailer component
125*ec47cc4bSchristos
126*ec47cc4bSchristos // Get the next chunk of input from in.
127*ec47cc4bSchristos unsigned char dat[CHUNK];
128*ec47cc4bSchristos strm.avail_in = fread(dat, 1, CHUNK, in);
129*ec47cc4bSchristos if (strm.avail_in == 0)
130*ec47cc4bSchristos break;
131*ec47cc4bSchristos more = strm.avail_in == CHUNK;
132*ec47cc4bSchristos strm.next_in = put = dat;
133*ec47cc4bSchristos
134*ec47cc4bSchristos // Run that chunk of input through the inflate engine to exhaustion.
135*ec47cc4bSchristos do {
136*ec47cc4bSchristos // At this point it is assured that strm.avail_in > 0.
137*ec47cc4bSchristos
138*ec47cc4bSchristos // Inflate until the end of a gzip component (header, deflate
139*ec47cc4bSchristos // block, trailer) is reached, or until all of the chunk is
140*ec47cc4bSchristos // consumed. The resulting decompressed data is discarded, though
141*ec47cc4bSchristos // the total size of the decompressed data in each member is
142*ec47cc4bSchristos // tracked, for the calculation of the total CRC.
143*ec47cc4bSchristos do {
144*ec47cc4bSchristos // inflate and handle any errors
145*ec47cc4bSchristos unsigned char scrap[CHUNK];
146*ec47cc4bSchristos strm.avail_out = CHUNK;
147*ec47cc4bSchristos strm.next_out = scrap;
148*ec47cc4bSchristos int ret = inflate(&strm, Z_BLOCK);
149*ec47cc4bSchristos if (ret == Z_MEM_ERROR)
150*ec47cc4bSchristos BYE("out of memory");
151*ec47cc4bSchristos if (ret == Z_DATA_ERROR)
152*ec47cc4bSchristos BYE("input invalid: %s", strm.msg);
153*ec47cc4bSchristos if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END)
154*ec47cc4bSchristos BYE("internal error");
155*ec47cc4bSchristos
156*ec47cc4bSchristos // Update the number of uncompressed bytes generated in this
157*ec47cc4bSchristos // member. The actual count (not modulo 2^32) is required to
158*ec47cc4bSchristos // correctly compute the total CRC.
159*ec47cc4bSchristos unsigned got = CHUNK - strm.avail_out;
160*ec47cc4bSchristos memb += got;
161*ec47cc4bSchristos if (memb < got)
162*ec47cc4bSchristos BYE("overflow error");
163*ec47cc4bSchristos
164*ec47cc4bSchristos // Continue to process this chunk until it is consumed, or
165*ec47cc4bSchristos // until the end of a component (header, deflate block, or
166*ec47cc4bSchristos // trailer) is reached.
167*ec47cc4bSchristos } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0);
168*ec47cc4bSchristos
169*ec47cc4bSchristos // Since strm.avail_in was > 0 for the inflate call, some input was
170*ec47cc4bSchristos // just consumed. It is therefore assured that put < strm.next_in.
171*ec47cc4bSchristos
172*ec47cc4bSchristos // Disposition the consumed component or part of a component.
173*ec47cc4bSchristos switch (state) {
174*ec47cc4bSchristos case BETWEEN:
175*ec47cc4bSchristos state = HEAD;
176*ec47cc4bSchristos // Fall through to HEAD when some or all of the header is
177*ec47cc4bSchristos // processed.
178*ec47cc4bSchristos
179*ec47cc4bSchristos case HEAD:
180*ec47cc4bSchristos // Discard the header.
181*ec47cc4bSchristos if (strm.data_type & 0x80) {
182*ec47cc4bSchristos // End of header reached -- deflate blocks follow.
183*ec47cc4bSchristos put = strm.next_in;
184*ec47cc4bSchristos prev = num;
185*ec47cc4bSchristos memb = 0;
186*ec47cc4bSchristos state = BLOCK;
187*ec47cc4bSchristos }
188*ec47cc4bSchristos break;
189*ec47cc4bSchristos
190*ec47cc4bSchristos case BLOCK:
191*ec47cc4bSchristos // Copy the deflate stream to the output, but with the
192*ec47cc4bSchristos // last-block-bit cleared. Re-synchronize stored block
193*ec47cc4bSchristos // headers to the output byte boundaries. The bytes at
194*ec47cc4bSchristos // put..strm.next_in-1 is the compressed data that has been
195*ec47cc4bSchristos // processed and is ready to be copied to the output.
196*ec47cc4bSchristos
197*ec47cc4bSchristos // At this point, it is assured that new compressed data is
198*ec47cc4bSchristos // available, i.e., put < strm.next_in. If prev is -1, then
199*ec47cc4bSchristos // that compressed data starts in the middle of a deflate
200*ec47cc4bSchristos // block. If prev is not -1, then the bits in the bit
201*ec47cc4bSchristos // buffer, possibly combined with the bits in *put, contain
202*ec47cc4bSchristos // the three-bit header of the new deflate block. In that
203*ec47cc4bSchristos // case, prev is the number of bits from the previous block
204*ec47cc4bSchristos // that remain in the bit buffer. Since num is the number
205*ec47cc4bSchristos // of bits in the bit buffer, we have that num - prev is
206*ec47cc4bSchristos // the number of bits from the new block currently in the
207*ec47cc4bSchristos // bit buffer.
208*ec47cc4bSchristos
209*ec47cc4bSchristos // If strm.data_type & 0xc0 is 0x80, then the last byte of
210*ec47cc4bSchristos // the available compressed data includes the last bits of
211*ec47cc4bSchristos // the end of a deflate block. In that case, that last byte
212*ec47cc4bSchristos // also has strm.data_type & 0x1f bits of the next deflate
213*ec47cc4bSchristos // block, in the range 0..7. If strm.data_type & 0xc0 is
214*ec47cc4bSchristos // 0xc0, then the last byte of the compressed data is the
215*ec47cc4bSchristos // end of the deflate stream, followed by strm.data_type &
216*ec47cc4bSchristos // 0x1f pad bits, also in the range 0..7.
217*ec47cc4bSchristos
218*ec47cc4bSchristos // Set bits to the number of bits not yet consumed from the
219*ec47cc4bSchristos // last byte. If we are at the end of the block, bits is
220*ec47cc4bSchristos // either the number of bits in the last byte belonging to
221*ec47cc4bSchristos // the next block, or the number of pad bits after the
222*ec47cc4bSchristos // final block. In either of those cases, bits is in the
223*ec47cc4bSchristos // range 0..7.
224*ec47cc4bSchristos ; // (required due to C syntax oddity)
225*ec47cc4bSchristos int bits = strm.data_type & 0x1f;
226*ec47cc4bSchristos
227*ec47cc4bSchristos if (prev != -1) {
228*ec47cc4bSchristos // We are at the start of a new block. Clear the last
229*ec47cc4bSchristos // block bit, and check for special cases. If it is a
230*ec47cc4bSchristos // stored block, then emit the header and pad to the
231*ec47cc4bSchristos // next byte boundary. If it is a final, empty fixed
232*ec47cc4bSchristos // block, then excise it.
233*ec47cc4bSchristos
234*ec47cc4bSchristos // Some or all of the three header bits for this block
235*ec47cc4bSchristos // may already be in the bit buffer. Load any remaining
236*ec47cc4bSchristos // header bits into the bit buffer.
237*ec47cc4bSchristos if (num - prev < 3) {
238*ec47cc4bSchristos buf += (unsigned long)*put++ << num;
239*ec47cc4bSchristos num += 8;
240*ec47cc4bSchristos }
241*ec47cc4bSchristos
242*ec47cc4bSchristos // Set last to have a 1 in the position of the last
243*ec47cc4bSchristos // block bit in the bit buffer.
244*ec47cc4bSchristos unsigned long last = (unsigned long)1 << prev;
245*ec47cc4bSchristos
246*ec47cc4bSchristos if (((buf >> prev) & 7) == 3) {
247*ec47cc4bSchristos // This is a final fixed block. Load at least ten
248*ec47cc4bSchristos // bits from this block, including the header, into
249*ec47cc4bSchristos // the bit buffer. We already have at least three,
250*ec47cc4bSchristos // so at most one more byte needs to be loaded.
251*ec47cc4bSchristos if (num - prev < 10) {
252*ec47cc4bSchristos if (put == strm.next_in)
253*ec47cc4bSchristos // Need to go get and process more input.
254*ec47cc4bSchristos // We'll end up back here to finish this.
255*ec47cc4bSchristos break;
256*ec47cc4bSchristos buf += (unsigned long)*put++ << num;
257*ec47cc4bSchristos num += 8;
258*ec47cc4bSchristos }
259*ec47cc4bSchristos if (((buf >> prev) & 0x3ff) == 3) {
260*ec47cc4bSchristos // That final fixed block is empty. Delete it
261*ec47cc4bSchristos // to avoid adding an empty block every time a
262*ec47cc4bSchristos // gzip stream is normalized.
263*ec47cc4bSchristos num = prev;
264*ec47cc4bSchristos buf &= last - 1; // zero the pad bits
265*ec47cc4bSchristos }
266*ec47cc4bSchristos }
267*ec47cc4bSchristos else if (((buf >> prev) & 6) == 0) {
268*ec47cc4bSchristos // This is a stored block. Flush to the next
269*ec47cc4bSchristos // byte boundary after the three-bit header.
270*ec47cc4bSchristos num = (prev + 10) & ~7;
271*ec47cc4bSchristos buf &= last - 1; // zero the pad bits
272*ec47cc4bSchristos }
273*ec47cc4bSchristos
274*ec47cc4bSchristos // Clear the last block bit.
275*ec47cc4bSchristos buf &= ~last;
276*ec47cc4bSchristos
277*ec47cc4bSchristos // Write out complete bytes in the bit buffer.
278*ec47cc4bSchristos while (num >= 8) {
279*ec47cc4bSchristos putc(buf, out);
280*ec47cc4bSchristos buf >>= 8;
281*ec47cc4bSchristos num -= 8;
282*ec47cc4bSchristos }
283*ec47cc4bSchristos
284*ec47cc4bSchristos // If no more bytes left to process, then we have
285*ec47cc4bSchristos // consumed the byte that had bits from the next block.
286*ec47cc4bSchristos if (put == strm.next_in)
287*ec47cc4bSchristos bits = 0;
288*ec47cc4bSchristos }
289*ec47cc4bSchristos
290*ec47cc4bSchristos // We are done handling the deflate block header. Now copy
291*ec47cc4bSchristos // all or almost all of the remaining compressed data that
292*ec47cc4bSchristos // has been processed so far. Don't copy one byte at the
293*ec47cc4bSchristos // end if it contains bits from the next deflate block or
294*ec47cc4bSchristos // pad bits at the end of a deflate block.
295*ec47cc4bSchristos
296*ec47cc4bSchristos // mix is 1 if we are at the end of a deflate block, and if
297*ec47cc4bSchristos // some of the bits in the last byte follow this block. mix
298*ec47cc4bSchristos // is 0 if we are in the middle of a deflate block, if the
299*ec47cc4bSchristos // deflate block ended on a byte boundary, or if all of the
300*ec47cc4bSchristos // compressed data processed so far has been consumed.
301*ec47cc4bSchristos int mix = (strm.data_type & 0x80) && bits;
302*ec47cc4bSchristos
303*ec47cc4bSchristos // Copy all of the processed compressed data to the output,
304*ec47cc4bSchristos // except for the last byte if it contains bits from the
305*ec47cc4bSchristos // next deflate block or pad bits at the end of the deflate
306*ec47cc4bSchristos // stream. Copy the data after shifting in num bits from
307*ec47cc4bSchristos // buf in front of it, leaving num bits from the end of the
308*ec47cc4bSchristos // compressed data in buf when done.
309*ec47cc4bSchristos unsigned char *end = strm.next_in - mix;
310*ec47cc4bSchristos if (put < end) {
311*ec47cc4bSchristos if (num)
312*ec47cc4bSchristos // Insert num bits from buf before the data being
313*ec47cc4bSchristos // copied.
314*ec47cc4bSchristos do {
315*ec47cc4bSchristos buf += (unsigned)(*put++) << num;
316*ec47cc4bSchristos putc(buf, out);
317*ec47cc4bSchristos buf >>= 8;
318*ec47cc4bSchristos } while (put < end);
319*ec47cc4bSchristos else {
320*ec47cc4bSchristos // No shifting needed -- write directly.
321*ec47cc4bSchristos fwrite(put, 1, end - put, out);
322*ec47cc4bSchristos put = end;
323*ec47cc4bSchristos }
324*ec47cc4bSchristos }
325*ec47cc4bSchristos
326*ec47cc4bSchristos // Process the last processed byte if it wasn't written.
327*ec47cc4bSchristos if (mix) {
328*ec47cc4bSchristos // Load the last byte into the bit buffer.
329*ec47cc4bSchristos buf += (unsigned)(*put++) << num;
330*ec47cc4bSchristos num += 8;
331*ec47cc4bSchristos
332*ec47cc4bSchristos if (strm.data_type & 0x40) {
333*ec47cc4bSchristos // We are at the end of the deflate stream and
334*ec47cc4bSchristos // there are bits pad bits. Discard the pad bits
335*ec47cc4bSchristos // and write a byte to the output, if available.
336*ec47cc4bSchristos // Leave the num bits left over in buf to prepend
337*ec47cc4bSchristos // to the next deflate stream.
338*ec47cc4bSchristos num -= bits;
339*ec47cc4bSchristos if (num >= 8) {
340*ec47cc4bSchristos putc(buf, out);
341*ec47cc4bSchristos num -= 8;
342*ec47cc4bSchristos buf >>= 8;
343*ec47cc4bSchristos }
344*ec47cc4bSchristos
345*ec47cc4bSchristos // Force the pad bits in the bit buffer to zeros.
346*ec47cc4bSchristos buf &= ((unsigned long)1 << num) - 1;
347*ec47cc4bSchristos
348*ec47cc4bSchristos // Don't need to set prev here since going to TAIL.
349*ec47cc4bSchristos }
350*ec47cc4bSchristos else
351*ec47cc4bSchristos // At the end of an internal deflate block. Leave
352*ec47cc4bSchristos // the last byte in the bit buffer to examine on
353*ec47cc4bSchristos // the next entry to BLOCK, when more bits from the
354*ec47cc4bSchristos // next block will be available.
355*ec47cc4bSchristos prev = num - bits; // number of bits in buffer
356*ec47cc4bSchristos // from current block
357*ec47cc4bSchristos }
358*ec47cc4bSchristos
359*ec47cc4bSchristos // Don't have a byte left over, so we are in the middle of
360*ec47cc4bSchristos // a deflate block, or the deflate block ended on a byte
361*ec47cc4bSchristos // boundary. Set prev appropriately for the next entry into
362*ec47cc4bSchristos // BLOCK.
363*ec47cc4bSchristos else if (strm.data_type & 0x80)
364*ec47cc4bSchristos // The block ended on a byte boundary, so no header
365*ec47cc4bSchristos // bits are in the bit buffer.
366*ec47cc4bSchristos prev = num;
367*ec47cc4bSchristos else
368*ec47cc4bSchristos // In the middle of a deflate block, so no header here.
369*ec47cc4bSchristos prev = -1;
370*ec47cc4bSchristos
371*ec47cc4bSchristos // Check for the end of the deflate stream.
372*ec47cc4bSchristos if ((strm.data_type & 0xc0) == 0xc0) {
373*ec47cc4bSchristos // That ends the deflate stream on the input side, the
374*ec47cc4bSchristos // pad bits were discarded, and any remaining bits from
375*ec47cc4bSchristos // the last block in the stream are saved in the bit
376*ec47cc4bSchristos // buffer to prepend to the next stream. Process the
377*ec47cc4bSchristos // gzip trailer next.
378*ec47cc4bSchristos tail = 0;
379*ec47cc4bSchristos part = 0;
380*ec47cc4bSchristos state = TAIL;
381*ec47cc4bSchristos }
382*ec47cc4bSchristos break;
383*ec47cc4bSchristos
384*ec47cc4bSchristos case TAIL:
385*ec47cc4bSchristos // Accumulate available trailer bytes to update the total
386*ec47cc4bSchristos // CRC and the total uncompressed length.
387*ec47cc4bSchristos do {
388*ec47cc4bSchristos part = (part >> 8) + ((unsigned long)(*put++) << 24);
389*ec47cc4bSchristos tail++;
390*ec47cc4bSchristos if (tail == 4) {
391*ec47cc4bSchristos // Update the total CRC.
392*ec47cc4bSchristos z_off_t len2 = memb;
393*ec47cc4bSchristos if (len2 < 0 || (unsigned long long)len2 != memb)
394*ec47cc4bSchristos BYE("overflow error");
395*ec47cc4bSchristos crc = crc ? crc32_combine(crc, part, len2) : part;
396*ec47cc4bSchristos part = 0;
397*ec47cc4bSchristos }
398*ec47cc4bSchristos else if (tail == 8) {
399*ec47cc4bSchristos // Update the total uncompressed length. (It's ok
400*ec47cc4bSchristos // if this sum is done modulo 2^32.)
401*ec47cc4bSchristos len += part;
402*ec47cc4bSchristos
403*ec47cc4bSchristos // At the end of a member. Set up to inflate an
404*ec47cc4bSchristos // immediately following gzip member. (If we made
405*ec47cc4bSchristos // it this far, then the trailer was valid.)
406*ec47cc4bSchristos if (inflateReset(&strm) != Z_OK)
407*ec47cc4bSchristos BYE("internal error");
408*ec47cc4bSchristos state = BETWEEN;
409*ec47cc4bSchristos break;
410*ec47cc4bSchristos }
411*ec47cc4bSchristos } while (put < strm.next_in);
412*ec47cc4bSchristos break;
413*ec47cc4bSchristos }
414*ec47cc4bSchristos
415*ec47cc4bSchristos // Process the input buffer until completely consumed.
416*ec47cc4bSchristos } while (strm.avail_in > 0);
417*ec47cc4bSchristos
418*ec47cc4bSchristos // Process input until end of file, invalid input, or i/o error.
419*ec47cc4bSchristos } while (more);
420*ec47cc4bSchristos
421*ec47cc4bSchristos // Done with the inflate engine.
422*ec47cc4bSchristos inflateEnd(&strm);
423*ec47cc4bSchristos
424*ec47cc4bSchristos // Verify the validity of the input.
425*ec47cc4bSchristos if (state != BETWEEN)
426*ec47cc4bSchristos BYE("input invalid: incomplete gzip stream");
427*ec47cc4bSchristos
428*ec47cc4bSchristos // Write the remaining deflate stream bits, followed by a terminating
429*ec47cc4bSchristos // deflate fixed block.
430*ec47cc4bSchristos buf += (unsigned long)3 << num;
431*ec47cc4bSchristos putc(buf, out);
432*ec47cc4bSchristos putc(buf >> 8, out);
433*ec47cc4bSchristos if (num > 6)
434*ec47cc4bSchristos putc(0, out);
435*ec47cc4bSchristos
436*ec47cc4bSchristos // Write the gzip trailer, which is the CRC and the uncompressed length
437*ec47cc4bSchristos // modulo 2^32, both in little-endian order.
438*ec47cc4bSchristos putc(crc, out);
439*ec47cc4bSchristos putc(crc >> 8, out);
440*ec47cc4bSchristos putc(crc >> 16, out);
441*ec47cc4bSchristos putc(crc >> 24, out);
442*ec47cc4bSchristos putc(len, out);
443*ec47cc4bSchristos putc(len >> 8, out);
444*ec47cc4bSchristos putc(len >> 16, out);
445*ec47cc4bSchristos putc(len >> 24, out);
446*ec47cc4bSchristos fflush(out);
447*ec47cc4bSchristos
448*ec47cc4bSchristos // Check for any i/o errors.
449*ec47cc4bSchristos if (ferror(in) || ferror(out))
450*ec47cc4bSchristos BYE("i/o error: %s", strerror(errno));
451*ec47cc4bSchristos
452*ec47cc4bSchristos // All good!
453*ec47cc4bSchristos *err = NULL;
454*ec47cc4bSchristos return 0;
455*ec47cc4bSchristos }
456*ec47cc4bSchristos
457*ec47cc4bSchristos // Normalize the gzip stream on stdin, writing the result to stdout.
main(void)458*ec47cc4bSchristos int main(void) {
459*ec47cc4bSchristos // Avoid end-of-line conversions on evil operating systems.
460*ec47cc4bSchristos SET_BINARY_MODE(stdin);
461*ec47cc4bSchristos SET_BINARY_MODE(stdout);
462*ec47cc4bSchristos
463*ec47cc4bSchristos // Normalize from stdin to stdout, returning 1 on error, 0 if ok.
464*ec47cc4bSchristos char *err;
465*ec47cc4bSchristos int ret = gzip_normalize(stdin, stdout, &err);
466*ec47cc4bSchristos if (ret)
467*ec47cc4bSchristos fprintf(stderr, "gznorm error: %s\n", err);
468*ec47cc4bSchristos free(err);
469*ec47cc4bSchristos return ret;
470*ec47cc4bSchristos }
471