xref: /netbsd-src/usr.bin/grep/file.c (revision 6cf6fe02a981b55727c49c3d37b0d8191a98c0ee)
1 /*	$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $	*/
2 /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
3 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
4 
5 /*-
6  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8  * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #if HAVE_NBTOOL_CONFIG_H
34 #include "nbtool_config.h"
35 #endif
36 
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 
44 #include <bzlib.h>
45 #include <err.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <stddef.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <unistd.h>
52 #include <wchar.h>
53 #include <wctype.h>
54 #include <zlib.h>
55 
56 #include "grep.h"
57 
58 #define	MAXBUFSIZ	(32 * 1024)
59 #define	LNBUFBUMP	80
60 
61 static gzFile gzbufdesc;
62 static BZFILE* bzbufdesc;
63 
64 static unsigned char buffer[MAXBUFSIZ];
65 static unsigned char *bufpos;
66 static size_t bufrem;
67 
68 static unsigned char *lnbuf;
69 static size_t lnbuflen;
70 
71 static inline int
72 grep_refill(struct file *f)
73 {
74 	ssize_t nr;
75 	int bzerr;
76 
77 	bufpos = buffer;
78 	bufrem = 0;
79 
80 	if (filebehave == FILE_GZIP)
81 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
82 	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
83 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
84 		switch (bzerr) {
85 		case BZ_OK:
86 		case BZ_STREAM_END:
87 			/* No problem, nr will be okay */
88 			break;
89 		case BZ_DATA_ERROR_MAGIC:
90 			/*
91 			 * As opposed to gzread(), which simply returns the
92 			 * plain file data, if it is not in the correct
93 			 * compressed format, BZ2_bzRead() instead aborts.
94 			 *
95 			 * So, just restart at the beginning of the file again,
96 			 * and use plain reads from now on.
97 			 */
98 			BZ2_bzReadClose(&bzerr, bzbufdesc);
99 			bzbufdesc = NULL;
100 			if (lseek(f->fd, 0, SEEK_SET) == -1)
101 				return (-1);
102 			nr = read(f->fd, buffer, MAXBUFSIZ);
103 			break;
104 		default:
105 			/* Make sure we exit with an error */
106 			nr = -1;
107 		}
108 	} else
109 		nr = read(f->fd, buffer, MAXBUFSIZ);
110 
111 	if (nr < 0)
112 		return (-1);
113 
114 	bufrem = nr;
115 	return (0);
116 }
117 
118 static inline int
119 grep_lnbufgrow(size_t newlen)
120 {
121 
122 	if (lnbuflen < newlen) {
123 		lnbuf = grep_realloc(lnbuf, newlen);
124 		lnbuflen = newlen;
125 	}
126 
127 	return (0);
128 }
129 
130 char *
131 grep_fgetln(struct file *f, size_t *lenp)
132 {
133 	unsigned char *p;
134 	char *ret;
135 	size_t len;
136 	size_t off;
137 	ptrdiff_t diff;
138 
139 	/* Fill the buffer, if necessary */
140 	if (bufrem == 0 && grep_refill(f) != 0)
141 		goto error;
142 
143 	if (bufrem == 0) {
144 		/* Return zero length to indicate EOF */
145 		*lenp = 0;
146 		return ((char *)bufpos);
147 	}
148 
149 	/* Look for a newline in the remaining part of the buffer */
150 	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
151 		++p; /* advance over newline */
152 		ret = (char *)bufpos;
153 		len = p - bufpos;
154 		bufrem -= len;
155 		bufpos = p;
156 		*lenp = len;
157 		return (ret);
158 	}
159 
160 	/* We have to copy the current buffered data to the line buffer */
161 	for (len = bufrem, off = 0; ; len += bufrem) {
162 		/* Make sure there is room for more data */
163 		if (grep_lnbufgrow(len + LNBUFBUMP))
164 			goto error;
165 		memcpy(lnbuf + off, bufpos, len - off);
166 		off = len;
167 		if (grep_refill(f) != 0)
168 			goto error;
169 		if (bufrem == 0)
170 			/* EOF: return partial line */
171 			break;
172 		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
173 			continue;
174 		/* got it: finish up the line (like code above) */
175 		++p;
176 		diff = p - bufpos;
177 		len += diff;
178 		if (grep_lnbufgrow(len))
179 		    goto error;
180 		memcpy(lnbuf + off, bufpos, diff);
181 		bufrem -= diff;
182 		bufpos = p;
183 		break;
184 	}
185 	*lenp = len;
186 	return ((char *)lnbuf);
187 
188 error:
189 	*lenp = 0;
190 	return (NULL);
191 }
192 
193 static inline struct file *
194 grep_file_init(struct file *f)
195 {
196 
197 	if (filebehave == FILE_GZIP &&
198 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
199 		goto error;
200 
201 	if (filebehave == FILE_BZIP &&
202 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
203 		goto error;
204 
205 	/* Fill read buffer, also catches errors early */
206 	if (grep_refill(f) != 0)
207 		goto error;
208 
209 	/* Check for binary stuff, if necessary */
210 	if (!nulldataflag && binbehave != BINFILE_TEXT &&
211 	    memchr(bufpos, '\0', bufrem) != NULL)
212 		f->binary = true;
213 
214 	return (f);
215 error:
216 	close(f->fd);
217 	free(f);
218 	return (NULL);
219 }
220 
221 /*
222  * Opens a file for processing.
223  */
224 struct file *
225 grep_open(const char *path)
226 {
227 	struct file *f;
228 
229 	f = grep_malloc(sizeof *f);
230 	memset(f, 0, sizeof *f);
231 	if (path == NULL) {
232 		/* Processing stdin implies --line-buffered. */
233 		lbflag = true;
234 		f->fd = STDIN_FILENO;
235 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
236 		free(f);
237 		return (NULL);
238 	}
239 
240 	return (grep_file_init(f));
241 }
242 
243 /*
244  * Closes a file.
245  */
246 void
247 grep_close(struct file *f)
248 {
249 
250 	close(f->fd);
251 
252 	/* Reset read buffer and line buffer */
253 	bufpos = buffer;
254 	bufrem = 0;
255 
256 	free(lnbuf);
257 	lnbuf = NULL;
258 	lnbuflen = 0;
259 }
260