xref: /openbsd-src/usr.bin/compress/gzopen.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: gzopen.c,v 1.25 2008/08/20 09:22:02 mpf Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 /* this is partially derived from the zlib's gzio.c file, so the notice: */
30 /*
31   zlib.h -- interface of the 'zlib' general purpose compression library
32   version 1.0.4, Jul 24th, 1996.
33 
34   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
35 
36   This software is provided 'as-is', without any express or implied
37   warranty.  In no event will the authors be held liable for any damages
38   arising from the use of this software.
39 
40   Permission is granted to anyone to use this software for any purpose,
41   including commercial applications, and to alter it and redistribute it
42   freely, subject to the following restrictions:
43 
44   1. The origin of this software must not be misrepresented; you must not
45      claim that you wrote the original software. If you use this software
46      in a product, an acknowledgment in the product documentation would be
47      appreciated but is not required.
48   2. Altered source versions must be plainly marked as such, and must not be
49      misrepresented as being the original software.
50   3. This notice may not be removed or altered from any source distribution.
51 
52   Jean-loup Gailly        Mark Adler
53   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
54 
55 
56   The data format used by the zlib library is described by RFCs (Request for
57   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
58   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
59 */
60 
61 #ifndef SMALL
62 const char gz_rcsid[] =
63     "$OpenBSD: gzopen.c,v 1.25 2008/08/20 09:22:02 mpf Exp $";
64 #endif
65 
66 #include <sys/param.h>
67 #include <sys/stat.h>
68 #include <sys/uio.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <errno.h>
73 #include <unistd.h>
74 #include <zlib.h>
75 #include "compress.h"
76 
77 /* gzip flag byte */
78 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
79 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
80 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
81 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
82 #define COMMENT      0x10 /* bit 4 set: file comment present */
83 #define RESERVED     0xE0 /* bits 5..7: reserved */
84 
85 #define DEF_MEM_LEVEL 8
86 #define OS_CODE 0x03 /* unix */
87 
88 typedef
89 struct gz_stream {
90 	int	z_fd;		/* .gz file */
91 	z_stream z_stream;	/* libz stream */
92 	int	z_eof;		/* set if end of input file */
93 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
94 	u_int32_t z_time;	/* timestamp (mtime) */
95 	u_int32_t z_hlen;	/* length of the gz header */
96 	u_int32_t z_crc;	/* crc32 of uncompressed data */
97 	char	z_mode;		/* 'w' or 'r' */
98 
99 } gz_stream;
100 
101 static const u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
102 
103 static int put_int32(gz_stream *, u_int32_t);
104 static u_int32_t get_int32(gz_stream *);
105 static int get_header(gz_stream *, char *, int);
106 static int put_header(gz_stream *, char *, u_int32_t, int);
107 static int get_byte(gz_stream *);
108 
109 void *
110 gz_open(int fd, const char *mode, char *name, int bits,
111     u_int32_t mtime, int gotmagic)
112 {
113 	gz_stream *s;
114 
115 	if (fd < 0 || !mode)
116 		return NULL;
117 
118 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
119 	    bits < 0 || bits > Z_BEST_COMPRESSION) {
120 		errno = EINVAL;
121 		return NULL;
122 	}
123 	if ((s = (gz_stream *)calloc(1, sizeof(gz_stream))) == NULL)
124 		return NULL;
125 
126 	s->z_stream.zalloc = (alloc_func)0;
127 	s->z_stream.zfree = (free_func)0;
128 	s->z_stream.opaque = (voidpf)0;
129 	s->z_stream.next_in = Z_NULL;
130 	s->z_stream.next_out = Z_NULL;
131 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
132 	s->z_fd = 0;
133 	s->z_eof = 0;
134 	s->z_time = 0;
135 	s->z_hlen = 0;
136 	s->z_crc = crc32(0L, Z_NULL, 0);
137 	s->z_mode = mode[0];
138 
139 	if (s->z_mode == 'w') {
140 #ifndef SMALL
141 		/* windowBits is passed < 0 to suppress zlib header */
142 		if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
143 				 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
144 			free (s);
145 			return NULL;
146 		}
147 		s->z_stream.next_out = s->z_buf;
148 #else
149 		return (NULL);
150 #endif
151 	} else {
152 		if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
153 			free (s);
154 			return NULL;
155 		}
156 		s->z_stream.next_in = s->z_buf;
157 	}
158 	s->z_stream.avail_out = Z_BUFSIZE;
159 
160 	errno = 0;
161 	s->z_fd = fd;
162 
163 	if (s->z_mode == 'w') {
164 		/* write the .gz header */
165 		if (put_header(s, name, mtime, bits) != 0) {
166 			gz_close(s, NULL, NULL, NULL);
167 			s = NULL;
168 		}
169 	} else {
170 		/* read the .gz header */
171 		if (get_header(s, name, gotmagic) != 0) {
172 			gz_close(s, NULL, NULL, NULL);
173 			s = NULL;
174 		}
175 	}
176 
177 	return s;
178 }
179 
180 int
181 gz_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
182 {
183 	gz_stream *s = (gz_stream*)cookie;
184 	int err = 0;
185 
186 	if (s == NULL)
187 		return -1;
188 
189 #ifndef SMALL
190 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
191 		if ((err = put_int32 (s, s->z_crc)) == Z_OK) {
192 			s->z_hlen += sizeof(int32_t);
193 			if ((err = put_int32 (s, s->z_stream.total_in)) == Z_OK)
194 				s->z_hlen += sizeof(int32_t);
195 		}
196 	}
197 #endif
198 	if (!err && s->z_stream.state != NULL) {
199 		if (s->z_mode == 'w')
200 #ifndef SMALL
201 			err = deflateEnd(&s->z_stream);
202 #else
203 			err = -1;
204 #endif
205 		else if (s->z_mode == 'r')
206 			err = inflateEnd(&s->z_stream);
207 	}
208 
209 	if (info != NULL) {
210 		info->mtime = s->z_time;
211 		info->crc = s->z_crc;
212 		info->hlen = s->z_hlen;
213 		info->total_in = (off_t)s->z_stream.total_in;
214 		info->total_out = (off_t)s->z_stream.total_out;
215 	}
216 
217 	setfile(name, s->z_fd, sb);
218 	if (!err)
219 		err = close(s->z_fd);
220 	else
221 		(void)close(s->z_fd);
222 
223 	free(s);
224 
225 	return err;
226 }
227 
228 #ifndef SMALL
229 int
230 gz_flush(void *cookie, int flush)
231 {
232 	gz_stream *s = (gz_stream*)cookie;
233 	size_t len;
234 	int done = 0;
235 	int err;
236 
237 	if (s == NULL || s->z_mode != 'w') {
238 		errno = EBADF;
239 		return Z_ERRNO;
240 	}
241 
242 	s->z_stream.avail_in = 0; /* should be zero already anyway */
243 
244 	for (;;) {
245 		len = Z_BUFSIZE - s->z_stream.avail_out;
246 
247 		if (len != 0) {
248 			if (write(s->z_fd, s->z_buf, len) != len)
249 				return Z_ERRNO;
250 			s->z_stream.next_out = s->z_buf;
251 			s->z_stream.avail_out = Z_BUFSIZE;
252 		}
253 		if (done)
254 			break;
255 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
256 		    err != Z_STREAM_END)
257 			return err;
258 
259 		/* deflate has finished flushing only when it hasn't
260 		 * used up all the available space in the output buffer
261 		 */
262 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
263 	}
264 	return 0;
265 }
266 #endif
267 
268 static int
269 put_int32(gz_stream *s, u_int32_t x)
270 {
271 	u_int32_t y = htole32(x);
272 
273 	if (write(s->z_fd, &y, sizeof(y)) != sizeof(y))
274 		return Z_ERRNO;
275 	return 0;
276 }
277 
278 static int
279 get_byte(gz_stream *s)
280 {
281 	if (s->z_eof)
282 		return EOF;
283 
284 	if (s->z_stream.avail_in == 0) {
285 		errno = 0;
286 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
287 		if ((int)s->z_stream.avail_in <= 0) {
288 			s->z_eof = 1;
289 			return EOF;
290 		}
291 		s->z_stream.next_in = s->z_buf;
292 	}
293 	s->z_stream.avail_in--;
294 	return *s->z_stream.next_in++;
295 }
296 
297 static u_int32_t
298 get_int32(gz_stream *s)
299 {
300 	u_int32_t x;
301 
302 	x  = ((u_int32_t)(get_byte(s) & 0xff));
303 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
304 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
305 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
306 	return x;
307 }
308 
309 static int
310 get_header(gz_stream *s, char *name, int gotmagic)
311 {
312 	int method; /* method byte */
313 	int flags;  /* flags byte */
314 	char *ep;
315 	uInt len;
316 	int c;
317 
318 	/* Check the gzip magic header */
319 	if (!gotmagic) {
320 		for (len = 0; len < 2; len++) {
321 			c = get_byte(s);
322 			if (c != gz_magic[len]) {
323 				errno = EFTYPE;
324 				return -1;
325 			}
326 		}
327 	}
328 
329 	method = get_byte(s);
330 	flags = get_byte(s);
331 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
332 		errno = EFTYPE;
333 		return -1;
334 	}
335 
336 	/* Stash timestamp (mtime) */
337 	s->z_time = get_int32(s);
338 
339 	/* Discard xflags and OS code */
340 	(void)get_byte(s);
341 	(void)get_byte(s);
342 
343 	s->z_hlen = 10; /* magic, method, flags, time, xflags, OS code */
344 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
345 		len  =  (uInt)get_byte(s);
346 		len += ((uInt)get_byte(s))<<8;
347 		s->z_hlen += 2;
348 		/* len is garbage if EOF but the loop below will quit anyway */
349 		while (len-- != 0 && get_byte(s) != EOF)
350 			s->z_hlen++;
351 	}
352 
353 	if ((flags & ORIG_NAME) != 0) { /* read/save the original file name */
354 		if ((ep = name) != NULL)
355 			ep += MAXPATHLEN - 1;
356 		while ((c = get_byte(s)) != EOF) {
357 			s->z_hlen++;
358 			if (c == '\0')
359 				break;
360 			if (name < ep)
361 				*name++ = c;
362 		}
363 		if (name != NULL)
364 			*name = '\0';
365 	}
366 
367 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
368 		while ((c = get_byte(s)) != EOF) {
369 			s->z_hlen++;
370 			if (c == '\0')
371 				break;
372 		}
373 	}
374 
375 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
376 		(void)get_byte(s);
377 		(void)get_byte(s);
378 		s->z_hlen += 2;
379 	}
380 
381 	if (s->z_eof) {
382 		errno = EFTYPE;
383 		return -1;
384 	}
385 
386 	return 0;
387 }
388 
389 static int
390 put_header(gz_stream *s, char *name, u_int32_t mtime, int bits)
391 {
392 	struct iovec iov[2];
393 	u_char buf[10];
394 
395 	buf[0] = gz_magic[0];
396 	buf[1] = gz_magic[1];
397 	buf[2] = Z_DEFLATED;
398 	buf[3] = name ? ORIG_NAME : 0;
399 	buf[4] = mtime & 0xff;
400 	buf[5] = (mtime >> 8) & 0xff;
401 	buf[6] = (mtime >> 16) & 0xff;
402 	buf[7] = (mtime >> 24) & 0xff;
403 	buf[8] = bits == 1 ? 4 : bits == 9 ? 2 : 0;	/* xflags */
404 	buf[9] = OS_CODE;
405 	iov[0].iov_base = buf;
406 	iov[0].iov_len = sizeof(buf);
407 	s->z_hlen = sizeof(buf);
408 
409 	if (name != NULL) {
410 		iov[1].iov_base = name;
411 		iov[1].iov_len = strlen(name) + 1;
412 		s->z_hlen += iov[1].iov_len;
413 	}
414 	if (writev(s->z_fd, iov, name ? 2 : 1) == -1)
415 		return (-1);
416 	return (0);
417 }
418 
419 int
420 gz_read(void *cookie, char *buf, int len)
421 {
422 	gz_stream *s = (gz_stream*)cookie;
423 	u_char *start = buf; /* starting point for crc computation */
424 	int error = Z_OK;
425 
426 	s->z_stream.next_out = buf;
427 	s->z_stream.avail_out = len;
428 
429 	while (error == Z_OK && !s->z_eof && s->z_stream.avail_out != 0) {
430 
431 		if (s->z_stream.avail_in == 0) {
432 
433 			errno = 0;
434 			s->z_stream.avail_in = read(s->z_fd, s->z_buf,
435 			    Z_BUFSIZE);
436 			if ((int)s->z_stream.avail_in <= 0)
437 				s->z_eof = 1;
438 			s->z_stream.next_in = s->z_buf;
439 		}
440 
441 		error = inflate(&(s->z_stream), Z_NO_FLUSH);
442 
443 		if (error == Z_DATA_ERROR) {
444 			errno = EINVAL;
445 			return -1;
446 		}
447 		if (error == Z_BUF_ERROR) {
448 			errno = EIO;
449 			return -1;
450 		}
451 		if (error == Z_STREAM_END) {
452 			/* Check CRC and original size */
453 			s->z_crc = crc32(s->z_crc, start,
454 			    (uInt)(s->z_stream.next_out - start));
455 			start = s->z_stream.next_out;
456 
457 			if (get_int32(s) != s->z_crc) {
458 				errno = EINVAL;
459 				return -1;
460 			}
461 			if (get_int32(s) != (u_int32_t)s->z_stream.total_out) {
462 				errno = EIO;
463 				return -1;
464 			}
465 			s->z_hlen += 2 * sizeof(int32_t);
466 			/* Check for the existence of an appended file. */
467 			if (get_header(s, NULL, 0) != 0) {
468 				s->z_eof = 1;
469 				break;
470 			}
471 			inflateReset(&(s->z_stream));
472 			s->z_crc = crc32(0L, Z_NULL, 0);
473 			error = Z_OK;
474 		}
475 	}
476 	s->z_crc = crc32(s->z_crc, start,
477 	    (uInt)(s->z_stream.next_out - start));
478 	len -= s->z_stream.avail_out;
479 
480 	return (len);
481 }
482 
483 int
484 gz_write(void *cookie, const char *buf, int len)
485 {
486 #ifndef SMALL
487 	gz_stream *s = (gz_stream*)cookie;
488 
489 	s->z_stream.next_in = (char *)buf;
490 	s->z_stream.avail_in = len;
491 
492 	while (s->z_stream.avail_in != 0) {
493 		if (s->z_stream.avail_out == 0) {
494 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
495 				break;
496 			s->z_stream.next_out = s->z_buf;
497 			s->z_stream.avail_out = Z_BUFSIZE;
498 		}
499 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
500 			break;
501 	}
502 	s->z_crc = crc32(s->z_crc, buf, len);
503 
504 	return (int)(len - s->z_stream.avail_in);
505 #endif
506 }
507