xref: /openbsd-src/usr.bin/compress/gzopen.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: gzopen.c,v 1.26 2009/11/11 18:04:35 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 /* this is partially derived from the zlib's gzio.c file, so the notice: */
30 /*
31   zlib.h -- interface of the 'zlib' general purpose compression library
32   version 1.0.4, Jul 24th, 1996.
33 
34   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
35 
36   This software is provided 'as-is', without any express or implied
37   warranty.  In no event will the authors be held liable for any damages
38   arising from the use of this software.
39 
40   Permission is granted to anyone to use this software for any purpose,
41   including commercial applications, and to alter it and redistribute it
42   freely, subject to the following restrictions:
43 
44   1. The origin of this software must not be misrepresented; you must not
45      claim that you wrote the original software. If you use this software
46      in a product, an acknowledgment in the product documentation would be
47      appreciated but is not required.
48   2. Altered source versions must be plainly marked as such, and must not be
49      misrepresented as being the original software.
50   3. This notice may not be removed or altered from any source distribution.
51 
52   Jean-loup Gailly        Mark Adler
53   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
54 
55 
56   The data format used by the zlib library is described by RFCs (Request for
57   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
58   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
59 */
60 
61 #ifndef SMALL
62 const char gz_rcsid[] =
63     "$OpenBSD: gzopen.c,v 1.26 2009/11/11 18:04:35 deraadt Exp $";
64 #endif
65 
66 #include <sys/param.h>
67 #include <sys/stat.h>
68 #include <sys/uio.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <errno.h>
73 #include <unistd.h>
74 #include <zlib.h>
75 #include "compress.h"
76 
77 /* gzip flag byte */
78 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
79 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
80 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
81 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
82 #define COMMENT      0x10 /* bit 4 set: file comment present */
83 #define RESERVED     0xE0 /* bits 5..7: reserved */
84 
85 #define DEF_MEM_LEVEL 8
86 #define OS_CODE 0x03 /* unix */
87 
88 typedef
89 struct gz_stream {
90 	int	z_fd;		/* .gz file */
91 	z_stream z_stream;	/* libz stream */
92 	int	z_eof;		/* set if end of input file */
93 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
94 	u_int32_t z_time;	/* timestamp (mtime) */
95 	u_int32_t z_hlen;	/* length of the gz header */
96 	u_int32_t z_crc;	/* crc32 of uncompressed data */
97 	char	z_mode;		/* 'w' or 'r' */
98 
99 } gz_stream;
100 
101 static const u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
102 
103 static int put_int32(gz_stream *, u_int32_t);
104 static u_int32_t get_int32(gz_stream *);
105 static int get_header(gz_stream *, char *, int);
106 static int put_header(gz_stream *, char *, u_int32_t, int);
107 static int get_byte(gz_stream *);
108 
109 void *
110 gz_open(int fd, const char *mode, char *name, int bits,
111     u_int32_t mtime, int gotmagic)
112 {
113 	gz_stream *s;
114 
115 	if (fd < 0 || !mode)
116 		return NULL;
117 
118 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
119 	    bits < 0 || bits > Z_BEST_COMPRESSION) {
120 		errno = EINVAL;
121 		return NULL;
122 	}
123 	if ((s = (gz_stream *)calloc(1, sizeof(gz_stream))) == NULL)
124 		return NULL;
125 
126 	s->z_stream.zalloc = (alloc_func)0;
127 	s->z_stream.zfree = (free_func)0;
128 	s->z_stream.opaque = (voidpf)0;
129 	s->z_stream.next_in = Z_NULL;
130 	s->z_stream.next_out = Z_NULL;
131 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
132 	s->z_fd = 0;
133 	s->z_eof = 0;
134 	s->z_time = 0;
135 	s->z_hlen = 0;
136 	s->z_crc = crc32(0L, Z_NULL, 0);
137 	s->z_mode = mode[0];
138 
139 	if (s->z_mode == 'w') {
140 #ifndef SMALL
141 		/* windowBits is passed < 0 to suppress zlib header */
142 		if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
143 				 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
144 			free (s);
145 			return NULL;
146 		}
147 		s->z_stream.next_out = s->z_buf;
148 #else
149 		free(s);
150 		return (NULL);
151 #endif
152 	} else {
153 		if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
154 			free (s);
155 			return NULL;
156 		}
157 		s->z_stream.next_in = s->z_buf;
158 	}
159 	s->z_stream.avail_out = Z_BUFSIZE;
160 
161 	errno = 0;
162 	s->z_fd = fd;
163 
164 	if (s->z_mode == 'w') {
165 		/* write the .gz header */
166 		if (put_header(s, name, mtime, bits) != 0) {
167 			gz_close(s, NULL, NULL, NULL);
168 			s = NULL;
169 		}
170 	} else {
171 		/* read the .gz header */
172 		if (get_header(s, name, gotmagic) != 0) {
173 			gz_close(s, NULL, NULL, NULL);
174 			s = NULL;
175 		}
176 	}
177 
178 	return s;
179 }
180 
181 int
182 gz_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
183 {
184 	gz_stream *s = (gz_stream*)cookie;
185 	int err = 0;
186 
187 	if (s == NULL)
188 		return -1;
189 
190 #ifndef SMALL
191 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
192 		if ((err = put_int32 (s, s->z_crc)) == Z_OK) {
193 			s->z_hlen += sizeof(int32_t);
194 			if ((err = put_int32 (s, s->z_stream.total_in)) == Z_OK)
195 				s->z_hlen += sizeof(int32_t);
196 		}
197 	}
198 #endif
199 	if (!err && s->z_stream.state != NULL) {
200 		if (s->z_mode == 'w')
201 #ifndef SMALL
202 			err = deflateEnd(&s->z_stream);
203 #else
204 			err = -1;
205 #endif
206 		else if (s->z_mode == 'r')
207 			err = inflateEnd(&s->z_stream);
208 	}
209 
210 	if (info != NULL) {
211 		info->mtime = s->z_time;
212 		info->crc = s->z_crc;
213 		info->hlen = s->z_hlen;
214 		info->total_in = (off_t)s->z_stream.total_in;
215 		info->total_out = (off_t)s->z_stream.total_out;
216 	}
217 
218 	setfile(name, s->z_fd, sb);
219 	if (!err)
220 		err = close(s->z_fd);
221 	else
222 		(void)close(s->z_fd);
223 
224 	free(s);
225 
226 	return err;
227 }
228 
229 #ifndef SMALL
230 int
231 gz_flush(void *cookie, int flush)
232 {
233 	gz_stream *s = (gz_stream*)cookie;
234 	size_t len;
235 	int done = 0;
236 	int err;
237 
238 	if (s == NULL || s->z_mode != 'w') {
239 		errno = EBADF;
240 		return Z_ERRNO;
241 	}
242 
243 	s->z_stream.avail_in = 0; /* should be zero already anyway */
244 
245 	for (;;) {
246 		len = Z_BUFSIZE - s->z_stream.avail_out;
247 
248 		if (len != 0) {
249 			if (write(s->z_fd, s->z_buf, len) != len)
250 				return Z_ERRNO;
251 			s->z_stream.next_out = s->z_buf;
252 			s->z_stream.avail_out = Z_BUFSIZE;
253 		}
254 		if (done)
255 			break;
256 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
257 		    err != Z_STREAM_END)
258 			return err;
259 
260 		/* deflate has finished flushing only when it hasn't
261 		 * used up all the available space in the output buffer
262 		 */
263 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
264 	}
265 	return 0;
266 }
267 #endif
268 
269 static int
270 put_int32(gz_stream *s, u_int32_t x)
271 {
272 	u_int32_t y = htole32(x);
273 
274 	if (write(s->z_fd, &y, sizeof(y)) != sizeof(y))
275 		return Z_ERRNO;
276 	return 0;
277 }
278 
279 static int
280 get_byte(gz_stream *s)
281 {
282 	if (s->z_eof)
283 		return EOF;
284 
285 	if (s->z_stream.avail_in == 0) {
286 		errno = 0;
287 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
288 		if ((int)s->z_stream.avail_in <= 0) {
289 			s->z_eof = 1;
290 			return EOF;
291 		}
292 		s->z_stream.next_in = s->z_buf;
293 	}
294 	s->z_stream.avail_in--;
295 	return *s->z_stream.next_in++;
296 }
297 
298 static u_int32_t
299 get_int32(gz_stream *s)
300 {
301 	u_int32_t x;
302 
303 	x  = ((u_int32_t)(get_byte(s) & 0xff));
304 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
305 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
306 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
307 	return x;
308 }
309 
310 static int
311 get_header(gz_stream *s, char *name, int gotmagic)
312 {
313 	int method; /* method byte */
314 	int flags;  /* flags byte */
315 	char *ep;
316 	uInt len;
317 	int c;
318 
319 	/* Check the gzip magic header */
320 	if (!gotmagic) {
321 		for (len = 0; len < 2; len++) {
322 			c = get_byte(s);
323 			if (c != gz_magic[len]) {
324 				errno = EFTYPE;
325 				return -1;
326 			}
327 		}
328 	}
329 
330 	method = get_byte(s);
331 	flags = get_byte(s);
332 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
333 		errno = EFTYPE;
334 		return -1;
335 	}
336 
337 	/* Stash timestamp (mtime) */
338 	s->z_time = get_int32(s);
339 
340 	/* Discard xflags and OS code */
341 	(void)get_byte(s);
342 	(void)get_byte(s);
343 
344 	s->z_hlen = 10; /* magic, method, flags, time, xflags, OS code */
345 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
346 		len  =  (uInt)get_byte(s);
347 		len += ((uInt)get_byte(s))<<8;
348 		s->z_hlen += 2;
349 		/* len is garbage if EOF but the loop below will quit anyway */
350 		while (len-- != 0 && get_byte(s) != EOF)
351 			s->z_hlen++;
352 	}
353 
354 	if ((flags & ORIG_NAME) != 0) { /* read/save the original file name */
355 		if ((ep = name) != NULL)
356 			ep += MAXPATHLEN - 1;
357 		while ((c = get_byte(s)) != EOF) {
358 			s->z_hlen++;
359 			if (c == '\0')
360 				break;
361 			if (name < ep)
362 				*name++ = c;
363 		}
364 		if (name != NULL)
365 			*name = '\0';
366 	}
367 
368 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
369 		while ((c = get_byte(s)) != EOF) {
370 			s->z_hlen++;
371 			if (c == '\0')
372 				break;
373 		}
374 	}
375 
376 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
377 		(void)get_byte(s);
378 		(void)get_byte(s);
379 		s->z_hlen += 2;
380 	}
381 
382 	if (s->z_eof) {
383 		errno = EFTYPE;
384 		return -1;
385 	}
386 
387 	return 0;
388 }
389 
390 static int
391 put_header(gz_stream *s, char *name, u_int32_t mtime, int bits)
392 {
393 	struct iovec iov[2];
394 	u_char buf[10];
395 
396 	buf[0] = gz_magic[0];
397 	buf[1] = gz_magic[1];
398 	buf[2] = Z_DEFLATED;
399 	buf[3] = name ? ORIG_NAME : 0;
400 	buf[4] = mtime & 0xff;
401 	buf[5] = (mtime >> 8) & 0xff;
402 	buf[6] = (mtime >> 16) & 0xff;
403 	buf[7] = (mtime >> 24) & 0xff;
404 	buf[8] = bits == 1 ? 4 : bits == 9 ? 2 : 0;	/* xflags */
405 	buf[9] = OS_CODE;
406 	iov[0].iov_base = buf;
407 	iov[0].iov_len = sizeof(buf);
408 	s->z_hlen = sizeof(buf);
409 
410 	if (name != NULL) {
411 		iov[1].iov_base = name;
412 		iov[1].iov_len = strlen(name) + 1;
413 		s->z_hlen += iov[1].iov_len;
414 	}
415 	if (writev(s->z_fd, iov, name ? 2 : 1) == -1)
416 		return (-1);
417 	return (0);
418 }
419 
420 int
421 gz_read(void *cookie, char *buf, int len)
422 {
423 	gz_stream *s = (gz_stream*)cookie;
424 	u_char *start = buf; /* starting point for crc computation */
425 	int error = Z_OK;
426 
427 	s->z_stream.next_out = buf;
428 	s->z_stream.avail_out = len;
429 
430 	while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) {
431 
432 		if (s->z_stream.avail_in == 0) {
433 
434 			errno = 0;
435 			s->z_stream.avail_in = read(s->z_fd, s->z_buf,
436 			    Z_BUFSIZE);
437 			if ((int)s->z_stream.avail_in <= 0)
438 				s->z_eof = 1;
439 			s->z_stream.next_in = s->z_buf;
440 		}
441 
442 		error = inflate(&(s->z_stream), Z_NO_FLUSH);
443 
444 		if (error == Z_DATA_ERROR) {
445 			errno = EINVAL;
446 			return -1;
447 		}
448 		if (error == Z_BUF_ERROR) {
449 			errno = EIO;
450 			return -1;
451 		}
452 		if (error == Z_STREAM_END) {
453 			/* Check CRC and original size */
454 			s->z_crc = crc32(s->z_crc, start,
455 			    (uInt)(s->z_stream.next_out - start));
456 			start = s->z_stream.next_out;
457 
458 			if (get_int32(s) != s->z_crc) {
459 				errno = EINVAL;
460 				return -1;
461 			}
462 			if (get_int32(s) != (u_int32_t)s->z_stream.total_out) {
463 				errno = EIO;
464 				return -1;
465 			}
466 			s->z_hlen += 2 * sizeof(int32_t);
467 			/* Check for the existence of an appended file. */
468 			if (get_header(s, NULL, 0) != 0) {
469 				s->z_eof = 1;
470 				break;
471 			}
472 			inflateReset(&(s->z_stream));
473 			s->z_crc = crc32(0L, Z_NULL, 0);
474 			error = Z_OK;
475 		}
476 	}
477 	s->z_crc = crc32(s->z_crc, start,
478 	    (uInt)(s->z_stream.next_out - start));
479 	len -= s->z_stream.avail_out;
480 
481 	return (len);
482 }
483 
484 int
485 gz_write(void *cookie, const char *buf, int len)
486 {
487 #ifndef SMALL
488 	gz_stream *s = (gz_stream*)cookie;
489 
490 	s->z_stream.next_in = (char *)buf;
491 	s->z_stream.avail_in = len;
492 
493 	while (s->z_stream.avail_in != 0) {
494 		if (s->z_stream.avail_out == 0) {
495 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
496 				break;
497 			s->z_stream.next_out = s->z_buf;
498 			s->z_stream.avail_out = Z_BUFSIZE;
499 		}
500 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
501 			break;
502 	}
503 	s->z_crc = crc32(s->z_crc, buf, len);
504 
505 	return (int)(len - s->z_stream.avail_in);
506 #endif
507 }
508