xref: /openbsd-src/usr.bin/compress/gzopen.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: gzopen.c,v 1.27 2011/09/22 10:41:04 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 /* this is partially derived from the zlib's gzio.c file, so the notice: */
30 /*
31   zlib.h -- interface of the 'zlib' general purpose compression library
32   version 1.0.4, Jul 24th, 1996.
33 
34   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
35 
36   This software is provided 'as-is', without any express or implied
37   warranty.  In no event will the authors be held liable for any damages
38   arising from the use of this software.
39 
40   Permission is granted to anyone to use this software for any purpose,
41   including commercial applications, and to alter it and redistribute it
42   freely, subject to the following restrictions:
43 
44   1. The origin of this software must not be misrepresented; you must not
45      claim that you wrote the original software. If you use this software
46      in a product, an acknowledgment in the product documentation would be
47      appreciated but is not required.
48   2. Altered source versions must be plainly marked as such, and must not be
49      misrepresented as being the original software.
50   3. This notice may not be removed or altered from any source distribution.
51 
52   Jean-loup Gailly        Mark Adler
53   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
54 
55 
56   The data format used by the zlib library is described by RFCs (Request for
57   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
58   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
59 */
60 
61 #include <sys/param.h>
62 #include <sys/stat.h>
63 #include <sys/uio.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <errno.h>
68 #include <unistd.h>
69 #include <zlib.h>
70 #include "compress.h"
71 
72 /* gzip flag byte */
73 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
74 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
75 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
76 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
77 #define COMMENT      0x10 /* bit 4 set: file comment present */
78 #define RESERVED     0xE0 /* bits 5..7: reserved */
79 
80 #define DEF_MEM_LEVEL 8
81 #define OS_CODE 0x03 /* unix */
82 
83 typedef
84 struct gz_stream {
85 	int	z_fd;		/* .gz file */
86 	z_stream z_stream;	/* libz stream */
87 	int	z_eof;		/* set if end of input file */
88 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
89 	u_int32_t z_time;	/* timestamp (mtime) */
90 	u_int32_t z_hlen;	/* length of the gz header */
91 	u_int32_t z_crc;	/* crc32 of uncompressed data */
92 	char	z_mode;		/* 'w' or 'r' */
93 
94 } gz_stream;
95 
96 static const u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
97 
98 static int put_int32(gz_stream *, u_int32_t);
99 static u_int32_t get_int32(gz_stream *);
100 static int get_header(gz_stream *, char *, int);
101 static int put_header(gz_stream *, char *, u_int32_t, int);
102 static int get_byte(gz_stream *);
103 
104 void *
105 gz_open(int fd, const char *mode, char *name, int bits,
106     u_int32_t mtime, int gotmagic)
107 {
108 	gz_stream *s;
109 
110 	if (fd < 0 || !mode)
111 		return NULL;
112 
113 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
114 	    bits < 0 || bits > Z_BEST_COMPRESSION) {
115 		errno = EINVAL;
116 		return NULL;
117 	}
118 	if ((s = (gz_stream *)calloc(1, sizeof(gz_stream))) == NULL)
119 		return NULL;
120 
121 	s->z_stream.zalloc = (alloc_func)0;
122 	s->z_stream.zfree = (free_func)0;
123 	s->z_stream.opaque = (voidpf)0;
124 	s->z_stream.next_in = Z_NULL;
125 	s->z_stream.next_out = Z_NULL;
126 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
127 	s->z_fd = 0;
128 	s->z_eof = 0;
129 	s->z_time = 0;
130 	s->z_hlen = 0;
131 	s->z_crc = crc32(0L, Z_NULL, 0);
132 	s->z_mode = mode[0];
133 
134 	if (s->z_mode == 'w') {
135 #ifndef SMALL
136 		/* windowBits is passed < 0 to suppress zlib header */
137 		if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
138 				 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
139 			free (s);
140 			return NULL;
141 		}
142 		s->z_stream.next_out = s->z_buf;
143 #else
144 		free(s);
145 		return (NULL);
146 #endif
147 	} else {
148 		if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
149 			free (s);
150 			return NULL;
151 		}
152 		s->z_stream.next_in = s->z_buf;
153 	}
154 	s->z_stream.avail_out = Z_BUFSIZE;
155 
156 	errno = 0;
157 	s->z_fd = fd;
158 
159 	if (s->z_mode == 'w') {
160 		/* write the .gz header */
161 		if (put_header(s, name, mtime, bits) != 0) {
162 			gz_close(s, NULL, NULL, NULL);
163 			s = NULL;
164 		}
165 	} else {
166 		/* read the .gz header */
167 		if (get_header(s, name, gotmagic) != 0) {
168 			gz_close(s, NULL, NULL, NULL);
169 			s = NULL;
170 		}
171 	}
172 
173 	return s;
174 }
175 
176 int
177 gz_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
178 {
179 	gz_stream *s = (gz_stream*)cookie;
180 	int err = 0;
181 
182 	if (s == NULL)
183 		return -1;
184 
185 #ifndef SMALL
186 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
187 		if ((err = put_int32 (s, s->z_crc)) == Z_OK) {
188 			s->z_hlen += sizeof(int32_t);
189 			if ((err = put_int32 (s, s->z_stream.total_in)) == Z_OK)
190 				s->z_hlen += sizeof(int32_t);
191 		}
192 	}
193 #endif
194 	if (!err && s->z_stream.state != NULL) {
195 		if (s->z_mode == 'w')
196 #ifndef SMALL
197 			err = deflateEnd(&s->z_stream);
198 #else
199 			err = -1;
200 #endif
201 		else if (s->z_mode == 'r')
202 			err = inflateEnd(&s->z_stream);
203 	}
204 
205 	if (info != NULL) {
206 		info->mtime = s->z_time;
207 		info->crc = s->z_crc;
208 		info->hlen = s->z_hlen;
209 		info->total_in = (off_t)s->z_stream.total_in;
210 		info->total_out = (off_t)s->z_stream.total_out;
211 	}
212 
213 	setfile(name, s->z_fd, sb);
214 	if (!err)
215 		err = close(s->z_fd);
216 	else
217 		(void)close(s->z_fd);
218 
219 	free(s);
220 
221 	return err;
222 }
223 
224 #ifndef SMALL
225 int
226 gz_flush(void *cookie, int flush)
227 {
228 	gz_stream *s = (gz_stream*)cookie;
229 	size_t len;
230 	int done = 0;
231 	int err;
232 
233 	if (s == NULL || s->z_mode != 'w') {
234 		errno = EBADF;
235 		return Z_ERRNO;
236 	}
237 
238 	s->z_stream.avail_in = 0; /* should be zero already anyway */
239 
240 	for (;;) {
241 		len = Z_BUFSIZE - s->z_stream.avail_out;
242 
243 		if (len != 0) {
244 			if (write(s->z_fd, s->z_buf, len) != len)
245 				return Z_ERRNO;
246 			s->z_stream.next_out = s->z_buf;
247 			s->z_stream.avail_out = Z_BUFSIZE;
248 		}
249 		if (done)
250 			break;
251 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
252 		    err != Z_STREAM_END)
253 			return err;
254 
255 		/* deflate has finished flushing only when it hasn't
256 		 * used up all the available space in the output buffer
257 		 */
258 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
259 	}
260 	return 0;
261 }
262 #endif
263 
264 static int
265 put_int32(gz_stream *s, u_int32_t x)
266 {
267 	u_int32_t y = htole32(x);
268 
269 	if (write(s->z_fd, &y, sizeof(y)) != sizeof(y))
270 		return Z_ERRNO;
271 	return 0;
272 }
273 
274 static int
275 get_byte(gz_stream *s)
276 {
277 	if (s->z_eof)
278 		return EOF;
279 
280 	if (s->z_stream.avail_in == 0) {
281 		errno = 0;
282 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
283 		if ((int)s->z_stream.avail_in <= 0) {
284 			s->z_eof = 1;
285 			return EOF;
286 		}
287 		s->z_stream.next_in = s->z_buf;
288 	}
289 	s->z_stream.avail_in--;
290 	return *s->z_stream.next_in++;
291 }
292 
293 static u_int32_t
294 get_int32(gz_stream *s)
295 {
296 	u_int32_t x;
297 
298 	x  = ((u_int32_t)(get_byte(s) & 0xff));
299 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
300 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
301 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
302 	return x;
303 }
304 
305 static int
306 get_header(gz_stream *s, char *name, int gotmagic)
307 {
308 	int method; /* method byte */
309 	int flags;  /* flags byte */
310 	char *ep;
311 	uInt len;
312 	int c;
313 
314 	/* Check the gzip magic header */
315 	if (!gotmagic) {
316 		for (len = 0; len < 2; len++) {
317 			c = get_byte(s);
318 			if (c != gz_magic[len]) {
319 				errno = EFTYPE;
320 				return -1;
321 			}
322 		}
323 	}
324 
325 	method = get_byte(s);
326 	flags = get_byte(s);
327 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
328 		errno = EFTYPE;
329 		return -1;
330 	}
331 
332 	/* Stash timestamp (mtime) */
333 	s->z_time = get_int32(s);
334 
335 	/* Discard xflags and OS code */
336 	(void)get_byte(s);
337 	(void)get_byte(s);
338 
339 	s->z_hlen = 10; /* magic, method, flags, time, xflags, OS code */
340 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
341 		len  =  (uInt)get_byte(s);
342 		len += ((uInt)get_byte(s))<<8;
343 		s->z_hlen += 2;
344 		/* len is garbage if EOF but the loop below will quit anyway */
345 		while (len-- != 0 && get_byte(s) != EOF)
346 			s->z_hlen++;
347 	}
348 
349 	if ((flags & ORIG_NAME) != 0) { /* read/save the original file name */
350 		if ((ep = name) != NULL)
351 			ep += MAXPATHLEN - 1;
352 		while ((c = get_byte(s)) != EOF) {
353 			s->z_hlen++;
354 			if (c == '\0')
355 				break;
356 			if (name < ep)
357 				*name++ = c;
358 		}
359 		if (name != NULL)
360 			*name = '\0';
361 	}
362 
363 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
364 		while ((c = get_byte(s)) != EOF) {
365 			s->z_hlen++;
366 			if (c == '\0')
367 				break;
368 		}
369 	}
370 
371 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
372 		(void)get_byte(s);
373 		(void)get_byte(s);
374 		s->z_hlen += 2;
375 	}
376 
377 	if (s->z_eof) {
378 		errno = EFTYPE;
379 		return -1;
380 	}
381 
382 	return 0;
383 }
384 
385 static int
386 put_header(gz_stream *s, char *name, u_int32_t mtime, int bits)
387 {
388 	struct iovec iov[2];
389 	u_char buf[10];
390 
391 	buf[0] = gz_magic[0];
392 	buf[1] = gz_magic[1];
393 	buf[2] = Z_DEFLATED;
394 	buf[3] = name ? ORIG_NAME : 0;
395 	buf[4] = mtime & 0xff;
396 	buf[5] = (mtime >> 8) & 0xff;
397 	buf[6] = (mtime >> 16) & 0xff;
398 	buf[7] = (mtime >> 24) & 0xff;
399 	buf[8] = bits == 1 ? 4 : bits == 9 ? 2 : 0;	/* xflags */
400 	buf[9] = OS_CODE;
401 	iov[0].iov_base = buf;
402 	iov[0].iov_len = sizeof(buf);
403 	s->z_hlen = sizeof(buf);
404 
405 	if (name != NULL) {
406 		iov[1].iov_base = name;
407 		iov[1].iov_len = strlen(name) + 1;
408 		s->z_hlen += iov[1].iov_len;
409 	}
410 	if (writev(s->z_fd, iov, name ? 2 : 1) == -1)
411 		return (-1);
412 	return (0);
413 }
414 
415 int
416 gz_read(void *cookie, char *buf, int len)
417 {
418 	gz_stream *s = (gz_stream*)cookie;
419 	u_char *start = buf; /* starting point for crc computation */
420 	int error = Z_OK;
421 
422 	s->z_stream.next_out = buf;
423 	s->z_stream.avail_out = len;
424 
425 	while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) {
426 
427 		if (s->z_stream.avail_in == 0) {
428 
429 			errno = 0;
430 			s->z_stream.avail_in = read(s->z_fd, s->z_buf,
431 			    Z_BUFSIZE);
432 			if ((int)s->z_stream.avail_in <= 0)
433 				s->z_eof = 1;
434 			s->z_stream.next_in = s->z_buf;
435 		}
436 
437 		error = inflate(&(s->z_stream), Z_NO_FLUSH);
438 
439 		if (error == Z_DATA_ERROR) {
440 			errno = EINVAL;
441 			return -1;
442 		}
443 		if (error == Z_BUF_ERROR) {
444 			errno = EIO;
445 			return -1;
446 		}
447 		if (error == Z_STREAM_END) {
448 			/* Check CRC and original size */
449 			s->z_crc = crc32(s->z_crc, start,
450 			    (uInt)(s->z_stream.next_out - start));
451 			start = s->z_stream.next_out;
452 
453 			if (get_int32(s) != s->z_crc) {
454 				errno = EINVAL;
455 				return -1;
456 			}
457 			if (get_int32(s) != (u_int32_t)s->z_stream.total_out) {
458 				errno = EIO;
459 				return -1;
460 			}
461 			s->z_hlen += 2 * sizeof(int32_t);
462 			/* Check for the existence of an appended file. */
463 			if (get_header(s, NULL, 0) != 0) {
464 				s->z_eof = 1;
465 				break;
466 			}
467 			inflateReset(&(s->z_stream));
468 			s->z_crc = crc32(0L, Z_NULL, 0);
469 			error = Z_OK;
470 		}
471 	}
472 	s->z_crc = crc32(s->z_crc, start,
473 	    (uInt)(s->z_stream.next_out - start));
474 	len -= s->z_stream.avail_out;
475 
476 	return (len);
477 }
478 
479 int
480 gz_write(void *cookie, const char *buf, int len)
481 {
482 #ifndef SMALL
483 	gz_stream *s = (gz_stream*)cookie;
484 
485 	s->z_stream.next_in = (char *)buf;
486 	s->z_stream.avail_in = len;
487 
488 	while (s->z_stream.avail_in != 0) {
489 		if (s->z_stream.avail_out == 0) {
490 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
491 				break;
492 			s->z_stream.next_out = s->z_buf;
493 			s->z_stream.avail_out = Z_BUFSIZE;
494 		}
495 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
496 			break;
497 	}
498 	s->z_crc = crc32(s->z_crc, buf, len);
499 
500 	return (int)(len - s->z_stream.avail_in);
501 #endif
502 }
503