xref: /openbsd-src/usr.bin/compress/gzopen.c (revision 8500990981f885cbe5e6a4958549cacc238b5ae6)
1 /*	$OpenBSD: gzopen.c,v 1.16 2003/11/21 21:54:46 millert Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 /* this is partially derived from the zlib's gzio.c file, so the notice: */
30 /*
31   zlib.h -- interface of the 'zlib' general purpose compression library
32   version 1.0.4, Jul 24th, 1996.
33 
34   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
35 
36   This software is provided 'as-is', without any express or implied
37   warranty.  In no event will the authors be held liable for any damages
38   arising from the use of this software.
39 
40   Permission is granted to anyone to use this software for any purpose,
41   including commercial applications, and to alter it and redistribute it
42   freely, subject to the following restrictions:
43 
44   1. The origin of this software must not be misrepresented; you must not
45      claim that you wrote the original software. If you use this software
46      in a product, an acknowledgment in the product documentation would be
47      appreciated but is not required.
48   2. Altered source versions must be plainly marked as such, and must not be
49      misrepresented as being the original software.
50   3. This notice may not be removed or altered from any source distribution.
51 
52   Jean-loup Gailly        Mark Adler
53   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
54 
55 
56   The data format used by the zlib library is described by RFCs (Request for
57   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
58   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
59 */
60 
61 const char gz_rcsid[] =
62     "$OpenBSD: gzopen.c,v 1.16 2003/11/21 21:54:46 millert Exp $";
63 
64 #include <sys/param.h>
65 #include <sys/stat.h>
66 #include <sys/uio.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <errno.h>
71 #include <unistd.h>
72 #include <zlib.h>
73 #include "compress.h"
74 
75 /* gzip flag byte */
76 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
77 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
78 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
79 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
80 #define COMMENT      0x10 /* bit 4 set: file comment present */
81 #define RESERVED     0xE0 /* bits 5..7: reserved */
82 
83 #define DEF_MEM_LEVEL 8
84 #define OS_CODE 0x03 /* unix */
85 
86 typedef
87 struct gz_stream {
88 	int	z_fd;		/* .gz file */
89 	z_stream z_stream;	/* libz stream */
90 	int	z_eof;		/* set if end of input file */
91 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
92 	u_int32_t z_time;	/* timestamp (mtime) */
93 	u_int32_t z_hlen;	/* length of the gz header */
94 	u_int32_t z_crc;	/* crc32 of uncompressed data */
95 	char	z_mode;		/* 'w' or 'r' */
96 
97 } gz_stream;
98 
99 static const u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
100 
101 static int put_int32(gz_stream *, u_int32_t);
102 static u_int32_t get_int32(gz_stream *);
103 static int get_header(gz_stream *, char *, int);
104 static int put_header(gz_stream *, char *, u_int32_t);
105 static int get_byte(gz_stream *);
106 
107 void *
108 gz_open(int fd, const char *mode, char *name, int bits,
109     u_int32_t mtime, int gotmagic)
110 {
111 	gz_stream *s;
112 
113 	if (fd < 0 || !mode)
114 		return NULL;
115 
116 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
117 	    bits < 0 || bits > Z_BEST_COMPRESSION) {
118 		errno = EINVAL;
119 		return NULL;
120 	}
121 	if ((s = (gz_stream *)calloc(1, sizeof(gz_stream))) == NULL)
122 		return NULL;
123 
124 	s->z_stream.zalloc = (alloc_func)0;
125 	s->z_stream.zfree = (free_func)0;
126 	s->z_stream.opaque = (voidpf)0;
127 	s->z_stream.next_in = Z_NULL;
128 	s->z_stream.next_out = Z_NULL;
129 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
130 	s->z_fd = 0;
131 	s->z_eof = 0;
132 	s->z_time = 0;
133 	s->z_hlen = 0;
134 	s->z_crc = crc32(0L, Z_NULL, 0);
135 	s->z_mode = mode[0];
136 
137 	if (s->z_mode == 'w') {
138 		/* windowBits is passed < 0 to suppress zlib header */
139 		if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
140 				 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
141 			free (s);
142 			return NULL;
143 		}
144 		s->z_stream.next_out = s->z_buf;
145 	} else {
146 		if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
147 			free (s);
148 			return NULL;
149 		}
150 		s->z_stream.next_in = s->z_buf;
151 	}
152 	s->z_stream.avail_out = Z_BUFSIZE;
153 
154 	errno = 0;
155 	s->z_fd = fd;
156 
157 	if (s->z_mode == 'w') {
158 		/* write the .gz header */
159 		if (put_header(s, name, mtime) != 0) {
160 			gz_close(s, NULL);
161 			s = NULL;
162 		}
163 	} else {
164 		/* read the .gz header */
165 		if (get_header(s, name, gotmagic) != 0) {
166 			gz_close(s, NULL);
167 			s = NULL;
168 		}
169 	}
170 
171 	return s;
172 }
173 
174 int
175 gz_close(void *cookie, struct z_info *info)
176 {
177 	gz_stream *s = (gz_stream*)cookie;
178 	int err = 0;
179 
180 	if (s == NULL)
181 		return -1;
182 
183 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
184 		if ((err = put_int32 (s, s->z_crc)) == Z_OK) {
185 			s->z_hlen += sizeof(int32_t);
186 			if ((err = put_int32 (s, s->z_stream.total_in)) == Z_OK)
187 				s->z_hlen += sizeof(int32_t);
188 		}
189 	}
190 
191 	if (!err && s->z_stream.state != NULL) {
192 		if (s->z_mode == 'w')
193 			err = deflateEnd(&s->z_stream);
194 		else if (s->z_mode == 'r')
195 			err = inflateEnd(&s->z_stream);
196 	}
197 
198 	if (info != NULL) {
199 		info->mtime = s->z_time;
200 		info->crc = s->z_crc;
201 		info->hlen = s->z_hlen;
202 		info->total_in = (off_t)s->z_stream.total_in;
203 		info->total_out = (off_t)s->z_stream.total_out;
204 	}
205 
206 	if (!err)
207 		err = close(s->z_fd);
208 	else
209 		(void)close(s->z_fd);
210 
211 	free(s);
212 
213 	return err;
214 }
215 
216 int
217 gz_flush(void *cookie, int flush)
218 {
219 	gz_stream *s = (gz_stream*)cookie;
220 	size_t len;
221 	int done = 0;
222 	int err;
223 
224 	if (s == NULL || s->z_mode != 'w') {
225 		errno = EBADF;
226 		return Z_ERRNO;
227 	}
228 
229 	s->z_stream.avail_in = 0; /* should be zero already anyway */
230 
231 	for (;;) {
232 		len = Z_BUFSIZE - s->z_stream.avail_out;
233 
234 		if (len != 0) {
235 			if (write(s->z_fd, s->z_buf, len) != len)
236 				return Z_ERRNO;
237 			s->z_stream.next_out = s->z_buf;
238 			s->z_stream.avail_out = Z_BUFSIZE;
239 		}
240 		if (done)
241 			break;
242 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
243 		    err != Z_STREAM_END)
244 			return err;
245 
246 		/* deflate has finished flushing only when it hasn't
247 		 * used up all the available space in the output buffer
248 		 */
249 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
250 	}
251 	return 0;
252 }
253 
254 static int
255 put_int32(gz_stream *s, u_int32_t x)
256 {
257 	u_int32_t y = htole32(x);
258 
259 	if (write(s->z_fd, &y, sizeof(y)) != sizeof(y))
260 		return Z_ERRNO;
261 	return 0;
262 }
263 
264 static int
265 get_byte(gz_stream *s)
266 {
267 	if (s->z_eof)
268 		return EOF;
269 
270 	if (s->z_stream.avail_in == 0) {
271 		errno = 0;
272 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
273 		if (s->z_stream.avail_in <= 0) {
274 			s->z_eof = 1;
275 			return EOF;
276 		}
277 		s->z_stream.next_in = s->z_buf;
278 	}
279 	s->z_stream.avail_in--;
280 	return *s->z_stream.next_in++;
281 }
282 
283 static u_int32_t
284 get_int32(gz_stream *s)
285 {
286 	u_int32_t x;
287 
288 	x  = ((u_int32_t)(get_byte(s) & 0xff));
289 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
290 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
291 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
292 	return x;
293 }
294 
295 static int
296 get_header(gz_stream *s, char *name, int gotmagic)
297 {
298 	int method; /* method byte */
299 	int flags;  /* flags byte */
300 	char *ep;
301 	uInt len;
302 	int c;
303 
304 	/* Check the gzip magic header */
305 	if (!gotmagic) {
306 		for (len = 0; len < 2; len++) {
307 			c = get_byte(s);
308 			if (c != gz_magic[len]) {
309 				errno = EFTYPE;
310 				return -1;
311 			}
312 		}
313 	}
314 
315 	method = get_byte(s);
316 	flags = get_byte(s);
317 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
318 		errno = EFTYPE;
319 		return -1;
320 	}
321 
322 	/* Stash timestamp (mtime) */
323 	s->z_time = get_int32(s);
324 
325 	/* Discard xflags and OS code */
326 	(void)get_byte(s);
327 	(void)get_byte(s);
328 
329 	s->z_hlen = 10; /* magic, method, flags, time, xflags, OS code */
330 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
331 		len  =  (uInt)get_byte(s);
332 		len += ((uInt)get_byte(s))<<8;
333 		s->z_hlen += 2;
334 		/* len is garbage if EOF but the loop below will quit anyway */
335 		while (len-- != 0 && get_byte(s) != EOF)
336 			s->z_hlen++;
337 	}
338 
339 	if ((flags & ORIG_NAME) != 0) { /* read/save the original file name */
340 		if ((ep = name) != NULL)
341 			ep += MAXPATHLEN - 1;
342 		while ((c = get_byte(s)) != EOF) {
343 			s->z_hlen++;
344 			if (c == '\0')
345 				break;
346 			if (name < ep)
347 				*name++ = c;
348 		}
349 		if (name != NULL)
350 			*name = '\0';
351 	}
352 
353 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
354 		while ((c = get_byte(s)) != EOF) {
355 			s->z_hlen++;
356 			if (c == '\0')
357 				break;
358 		}
359 	}
360 
361 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
362 		(void)get_byte(s);
363 		(void)get_byte(s);
364 		s->z_hlen += 2;
365 	}
366 
367 	if (s->z_eof) {
368 		errno = EFTYPE;
369 		return -1;
370 	}
371 
372 	return 0;
373 }
374 
375 static int
376 put_header(gz_stream *s, char *name, u_int32_t mtime)
377 {
378 	struct iovec iov[2];
379 	u_char buf[10];
380 
381 	buf[0] = gz_magic[0];
382 	buf[1] = gz_magic[1];
383 	buf[2] = Z_DEFLATED;
384 	buf[3] = name ? ORIG_NAME : 0;
385 	buf[4] = mtime & 0xff;
386 	buf[5] = (mtime >> 8) & 0xff;
387 	buf[6] = (mtime >> 16) & 0xff;
388 	buf[7] = (mtime >> 24) & 0xff;
389 	buf[8] = 0 /* xflags */;
390 	buf[9] = OS_CODE;
391 	iov[0].iov_base = buf;
392 	iov[0].iov_len = sizeof(buf);
393 	s->z_hlen = sizeof(buf);
394 
395 	if (name != NULL) {
396 		iov[1].iov_base = name;
397 		iov[1].iov_len = strlen(name) + 1;
398 		s->z_hlen += iov[1].iov_len;
399 	}
400 	if (writev(s->z_fd, iov, name ? 2 : 1) == -1)
401 		return (-1);
402 	return (0);
403 }
404 
405 int
406 gz_read(void *cookie, char *buf, int len)
407 {
408 	gz_stream *s = (gz_stream*)cookie;
409 	u_char *start = buf; /* starting point for crc computation */
410 
411 	s->z_stream.next_out = buf;
412 	s->z_stream.avail_out = len;
413 
414 	while (s->z_stream.avail_out != 0 && !s->z_eof) {
415 
416 		if (s->z_stream.avail_in == 0) {
417 
418 			errno = 0;
419 			if ((s->z_stream.avail_in =
420 			    read(s->z_fd, s->z_buf, Z_BUFSIZE)) == 0)
421 				s->z_eof = 1;
422 			s->z_stream.next_in = s->z_buf;
423 		}
424 
425 		if (inflate(&(s->z_stream), Z_NO_FLUSH) == Z_STREAM_END) {
426 			/* Check CRC and original size */
427 			s->z_crc = crc32(s->z_crc, start,
428 			    (uInt)(s->z_stream.next_out - start));
429 			start = s->z_stream.next_out;
430 
431 			if (get_int32(s) != s->z_crc) {
432 				errno = EINVAL;
433 				return -1;
434 			}
435 			if (get_int32(s) != s->z_stream.total_out) {
436 				errno = EIO;
437 				return -1;
438 			}
439 			s->z_hlen += 2 * sizeof(int32_t);
440 			/* Check for the existence of an appended file. */
441 			if (get_header(s, NULL, 0) != 0) {
442 				s->z_eof = 1;
443 				break;
444 			}
445 			inflateReset(&(s->z_stream));
446 			s->z_crc = crc32(0L, Z_NULL, 0);
447 		}
448 	}
449 	s->z_crc = crc32(s->z_crc, start,
450 	    (uInt)(s->z_stream.next_out - start));
451 	len -= s->z_stream.avail_out;
452 
453 	return (len);
454 }
455 
456 int
457 gz_write(void *cookie, const char *buf, int len)
458 {
459 	gz_stream *s = (gz_stream*)cookie;
460 
461 	s->z_stream.next_in = (char *)buf;
462 	s->z_stream.avail_in = len;
463 
464 	while (s->z_stream.avail_in != 0) {
465 		if (s->z_stream.avail_out == 0) {
466 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
467 				break;
468 			s->z_stream.next_out = s->z_buf;
469 			s->z_stream.avail_out = Z_BUFSIZE;
470 		}
471 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
472 			break;
473 	}
474 	s->z_crc = crc32(s->z_crc, buf, len);
475 
476 	return (int)(len - s->z_stream.avail_in);
477 }
478