xref: /openbsd-src/usr.bin/compress/gzopen.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: gzopen.c,v 1.1 1997/07/06 20:22:57 mickey Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by Michael Shalayeff.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  */
34 /* this is partially derived from the zlib's gzio.c file, so the notice: */
35 /*
36   zlib.h -- interface of the 'zlib' general purpose compression library
37   version 1.0.4, Jul 24th, 1996.
38 
39   Copyright (C) 1995-1996 Jean-loup Gailly and Mark Adler
40 
41   This software is provided 'as-is', without any express or implied
42   warranty.  In no event will the authors be held liable for any damages
43   arising from the use of this software.
44 
45   Permission is granted to anyone to use this software for any purpose,
46   including commercial applications, and to alter it and redistribute it
47   freely, subject to the following restrictions:
48 
49   1. The origin of this software must not be misrepresented; you must not
50      claim that you wrote the original software. If you use this software
51      in a product, an acknowledgment in the product documentation would be
52      appreciated but is not required.
53   2. Altered source versions must be plainly marked as such, and must not be
54      misrepresented as being the original software.
55   3. This notice may not be removed or altered from any source distribution.
56 
57   Jean-loup Gailly        Mark Adler
58   gzip@prep.ai.mit.edu    madler@alumni.caltech.edu
59 
60 
61   The data format used by the zlib library is described by RFCs (Request for
62   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
63   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
64 */
65 
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <errno.h>
71 #include <unistd.h>
72 #include <zlib.h>
73 #include "compress.h"
74 
75 /* gzip flag byte */
76 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
77 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
78 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
79 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
80 #define COMMENT      0x10 /* bit 4 set: file comment present */
81 #define RESERVED     0xE0 /* bits 5..7: reserved */
82 
83 #define DEF_MEM_LEVEL 8
84 #define OS_CODE 0x03 /* unix */
85 
86 typedef
87 struct gz_stream {
88 	int	z_fd;		/* .gz file */
89 	z_stream z_stream;	/* libz stream */
90 	int	z_eof;		/* set if end of input file */
91 	u_char	z_buf[Z_BUFSIZE]; /* i/o buffer */
92 	u_int32_t z_crc;	/* crc32 of uncompressed data */
93 	char	z_mode;		/* 'w' or 'r' */
94 
95 } gz_stream;
96 
97 static u_char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
98 
99 static int put_int32 __P((register gz_stream *, u_int32_t));
100 static u_int32_t get_int32 __P((register gz_stream *));
101 static int get_header __P((register gz_stream *));
102 static int get_byte __P((register gz_stream *));
103 
104 int
105 gz_check_header(fd, sb, ofn)
106 	int fd;
107 	struct stat *sb;
108 	const char *ofn;
109 {
110 	int f;
111 	u_char buf[sizeof(gz_magic)];
112 	off_t off = lseek(fd, 0, SEEK_CUR);
113 
114 	f = (read(fd, buf, sizeof(buf)) == sizeof(buf) &&
115 	     !memcmp(buf, gz_magic, sizeof(buf)));
116 
117 	lseek (fd, off, SEEK_SET);
118 
119 	return f;
120 }
121 
122 void *
123 gz_open (fd, mode, bits)
124 	int fd;
125 	const char *mode;
126 	int  bits;
127 {
128 	gz_stream *s;
129 
130 	if (fd < 0 || !mode)
131 		return NULL;
132 
133 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
134 	    bits < 0 || bits > Z_BEST_COMPRESSION) {
135 		errno = EINVAL;
136 		return NULL;
137 	}
138 	if ((s = (gz_stream *)calloc(sizeof(gz_stream), 1)) == NULL)
139 		return NULL;
140 
141 	s->z_stream.zalloc = (alloc_func)0;
142 	s->z_stream.zfree = (free_func)0;
143 	s->z_stream.opaque = (voidpf)0;
144 	s->z_stream.next_in = Z_NULL;
145 	s->z_stream.next_out = Z_NULL;
146 	s->z_stream.avail_in = s->z_stream.avail_out = 0;
147 	s->z_fd = 0;
148 	s->z_eof = 0;
149 	s->z_crc = crc32(0L, Z_NULL, 0);
150 	s->z_mode = mode[0];
151 
152 	if (s->z_mode == 'w') {
153 		/* windowBits is passed < 0 to suppress zlib header */
154 		if (deflateInit2(&(s->z_stream), bits, Z_DEFLATED,
155 				 -MAX_WBITS, DEF_MEM_LEVEL, 0) != Z_OK) {
156 			free (s);
157 			return NULL;
158 		}
159 		s->z_stream.next_out = s->z_buf;
160 	} else {
161 		if (inflateInit2(&(s->z_stream), -MAX_WBITS) != Z_OK) {
162 			free (s);
163 			return NULL;
164 		}
165 		s->z_stream.next_in = s->z_buf;
166 	}
167 	s->z_stream.avail_out = Z_BUFSIZE;
168 
169 	errno = 0;
170 	s->z_fd = fd;
171 
172 	if (s->z_mode == 'w') {
173 		u_char buf[10];
174 		/* Write a very simple .gz header: */
175 		buf[0] = gz_magic[0];
176 		buf[1] = gz_magic[1];
177 		buf[2] = Z_DEFLATED;
178 		buf[3] = 0 /*flags*/;
179 		buf[4] = buf[5] = buf[6] = buf[7] = 0 /*time*/;
180 		buf[8] = 0 /*xflags*/;
181 		buf[9] = OS_CODE;
182 		if (write(fd, buf, sizeof(buf)) != sizeof(buf)) {
183 			gz_close(s);
184 			s = NULL;
185 		}
186 	} else {
187 		if (get_header(s) != 0) { /* skip the .gz header */
188 			gz_close (s);
189 			s = NULL;
190 		}
191 	}
192 
193 	return s;
194 }
195 
196 int
197 gz_close (cookie)
198 	void *cookie;
199 {
200 	register gz_stream *s = (gz_stream*)cookie;
201 	int err = 0;
202 
203 	if (s == NULL)
204 		return -1;
205 
206 	if (s->z_mode == 'w' && (err = gz_flush (s, Z_FINISH)) == Z_OK) {
207 		if ((err = put_int32 (s, s->z_crc)) == Z_OK)
208 			err = put_int32 (s, s->z_stream.total_in);
209 	}
210 
211 	if (!err && s->z_stream.state != NULL) {
212 		if (s->z_mode == 'w')
213 			err = deflateEnd(&s->z_stream);
214 		else if (s->z_mode == 'r')
215 			err = inflateEnd(&s->z_stream);
216 	}
217 
218 	free(s);
219 
220 	return err;
221 }
222 
223 int
224 gz_flush (cookie, flush)
225     void *cookie;
226     int flush;
227 {
228 	register gz_stream *s = (gz_stream*)cookie;
229 	size_t len;
230 	int done = 0;
231 	int err;
232 
233 	if (s == NULL || s->z_mode != 'w') {
234 		errno = EBADF;
235 		return Z_ERRNO;
236 	}
237 
238 	s->z_stream.avail_in = 0; /* should be zero already anyway */
239 
240 	for (;;) {
241 		len = Z_BUFSIZE - s->z_stream.avail_out;
242 
243 		if (len != 0) {
244 			if (write(s->z_fd, s->z_buf, len) != len)
245 				return Z_ERRNO;
246 			s->z_stream.next_out = s->z_buf;
247 			s->z_stream.avail_out = Z_BUFSIZE;
248 		}
249 		if (done)
250 			break;
251 		if ((err = deflate(&(s->z_stream), flush)) != Z_OK &&
252 		    err != Z_STREAM_END)
253 			return err;
254 
255 		/* deflate has finished flushing only when it hasn't
256 		 * used up all the available space in the output buffer
257 		 */
258 		done = (s->z_stream.avail_out != 0 || err == Z_STREAM_END);
259 	}
260 	return 0;
261 }
262 
263 static int
264 put_int32 (s, x)
265 	register gz_stream *s;
266 	u_int32_t x;
267 {
268 	if (write(s->z_fd, &x, 1) != 1)
269 		return Z_ERRNO;
270 	x >>= 8;
271 	if (write(s->z_fd, &x, 1) != 1)
272 		return Z_ERRNO;
273 	x >>= 8;
274 	if (write(s->z_fd, &x, 1) != 1)
275 		return Z_ERRNO;
276 	x >>= 8;
277 	if (write(s->z_fd, &x, 1) != 1)
278 		return Z_ERRNO;
279 	return 0;
280 }
281 
282 static int
283 get_byte(s)
284 	register gz_stream *s;
285 {
286 	if (s->z_eof)
287 		return EOF;
288 
289 	if (s->z_stream.avail_in == 0) {
290 		errno = 0;
291 		s->z_stream.avail_in = read(s->z_fd, s->z_buf, Z_BUFSIZE);
292 		if (s->z_stream.avail_in <= 0) {
293 			s->z_eof = 1;
294 			return EOF;
295 		}
296 		s->z_stream.next_in = s->z_buf;
297 	}
298 	s->z_stream.avail_in--;
299 	return *s->z_stream.next_in++;
300 }
301 
302 static u_int32_t
303 get_int32 (s)
304 	register gz_stream *s;
305 {
306 	register u_int32_t x;
307 
308 	x  = ((u_int32_t)(get_byte(s) & 0xff));
309 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<8;
310 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<16;
311 	x |= ((u_int32_t)(get_byte(s) & 0xff))<<24;
312 	return x;
313 }
314 
315 static int
316 get_header(s)
317 	register gz_stream *s;
318 {
319 	int method; /* method byte */
320 	int flags;  /* flags byte */
321 	uInt len;
322 	int c;
323 
324 	/* Check the gzip magic header */
325 	for (len = 0; len < 2; len++) {
326 		c = get_byte(s);
327 		if (c != gz_magic[len]) {
328 			errno = EFTYPE;
329 			return -1;
330 		}
331 	}
332 
333 	method = get_byte(s);
334 	flags = get_byte(s);
335 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
336 		errno = EFTYPE;
337 		return -1;
338 	}
339 
340 	/* Discard time, xflags and OS code: */
341 	for (len = 0; len < 6; len++)
342 		(void)get_byte(s);
343 
344 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
345 		len  =  (uInt)get_byte(s);
346 		len += ((uInt)get_byte(s))<<8;
347 		/* len is garbage if EOF but the loop below will quit anyway */
348 		while (len-- != 0 && get_byte(s) != EOF)
349 			;
350 	}
351 
352 	if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
353 		while ((c = get_byte(s)) != 0 && c != EOF) ;
354 	}
355 
356 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
357 		while ((c = get_byte(s)) != 0 && c != EOF) ;
358 	}
359 
360 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
361 		for (len = 0; len < 2; len++) (void)get_byte(s);
362 	}
363 
364 	if (s->z_eof) {
365 		errno = EFTYPE;
366 		return -1;
367 	}
368 
369 	return 0;
370 }
371 
372 int
373 gz_read(cookie, buf, len)
374 	void *cookie;
375 	char *buf;
376 	int len;
377 {
378 	register gz_stream *s = (gz_stream*)cookie;
379 	u_char *start = buf; /* starting point for crc computation */
380 
381 	s->z_stream.next_out = buf;
382 	s->z_stream.avail_out = len;
383 
384 	while (s->z_stream.avail_out != 0 && !s->z_eof) {
385 
386 		if (s->z_stream.avail_in == 0) {
387 
388 			errno = 0;
389 			if ((s->z_stream.avail_in =
390 			     read(s->z_fd, s->z_buf, Z_BUFSIZE)) == 0)
391 				s->z_eof = 1;
392 			s->z_stream.next_in = s->z_buf;
393 		}
394 
395 		if (inflate(&(s->z_stream), Z_NO_FLUSH) == Z_STREAM_END) {
396 			/* Check CRC and original size */
397 			s->z_crc = crc32(s->z_crc, start,
398 				       (uInt)(s->z_stream.next_out - start));
399 			start = s->z_stream.next_out;
400 
401 			if (get_int32(s) != s->z_crc ||
402 			    get_int32(s) != s->z_stream.total_out) {
403 			        errno = EIO;
404 				return -1;
405 			}
406 			s->z_eof = 1;
407 			break;
408 		}
409 	}
410 	s->z_crc = crc32(s->z_crc, start,
411 			 (uInt)(s->z_stream.next_out - start));
412 
413 	return (int)(len - s->z_stream.avail_out);
414 }
415 
416 int
417 gz_write(cookie, buf, len)
418 	void *cookie;
419 	const char *buf;
420 	int len;
421 {
422 	register gz_stream *s = (gz_stream*)cookie;
423 
424 	s->z_stream.next_in = (char *)buf;
425 	s->z_stream.avail_in = len;
426 
427 	while (s->z_stream.avail_in != 0) {
428 
429 		if (s->z_stream.avail_out == 0) {
430 
431 			if (write(s->z_fd, s->z_buf, Z_BUFSIZE) != Z_BUFSIZE)
432 				break;
433 			s->z_stream.next_out = s->z_buf;
434 			s->z_stream.avail_out = Z_BUFSIZE;
435 		}
436 		if (deflate(&(s->z_stream), Z_NO_FLUSH) != Z_OK)
437 			break;
438 	}
439 	s->z_crc = crc32(s->z_crc, buf, len);
440 
441 	return (int)(len - s->z_stream.avail_in);
442 }
443 
444