xref: /netbsd-src/usr.bin/grep/file.c (revision 411775685fcb87110d1ee4eaaca4aad4fefcfd28)
1 /*	$NetBSD: file.c,v 1.13 2024/08/14 05:02:19 rin Exp $	*/
2 /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
3 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
4 
5 /*-
6  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8  * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #if HAVE_NBTOOL_CONFIG_H
34 #include "nbtool_config.h"
35 #endif
36 
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: file.c,v 1.13 2024/08/14 05:02:19 rin Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 
44 #include <err.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stddef.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 
54 #include "grep.h"
55 
56 #define	MAXBUFSIZ	(32 * 1024)
57 #define	LNBUFBUMP	80
58 
59 #ifndef WITHOUT_GZIP
60 static gzFile gzbufdesc;
61 #endif
62 #ifndef WITHOUT_BZ2
63 static BZFILE* bzbufdesc;
64 #endif
65 
66 static unsigned char buffer[MAXBUFSIZ + 1];
67 static unsigned char *bufpos;
68 static size_t bufrem;
69 
70 static unsigned char *lnbuf;
71 static size_t lnbuflen;
72 
73 static inline int
74 grep_refill(struct file *f)
75 {
76 	ssize_t nr = -1;
77 #ifndef WITHOUT_BZ2
78 	int bzerr;
79 #endif
80 
81 	bufpos = buffer;
82 	bufrem = 0;
83 
84 #ifndef WITHOUT_GZIP
85 	if (filebehave == FILE_GZIP) {
86 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
87 		if (nr == -1)
88 			return -1;
89 	}
90 #endif
91 #ifndef WITHOUT_BZ2
92 	if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
93 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
94 		switch (bzerr) {
95 		case BZ_OK:
96 		case BZ_STREAM_END:
97 			/* No problem, nr will be okay */
98 			break;
99 		case BZ_DATA_ERROR_MAGIC:
100 			/*
101 			 * As opposed to gzread(), which simply returns the
102 			 * plain file data, if it is not in the correct
103 			 * compressed format, BZ2_bzRead() instead aborts.
104 			 *
105 			 * So, just restart at the beginning of the file again,
106 			 * and use plain reads from now on.
107 			 */
108 			BZ2_bzReadClose(&bzerr, bzbufdesc);
109 			bzbufdesc = NULL;
110 			if (lseek(f->fd, 0, SEEK_SET) == -1)
111 				return (-1);
112 			nr = read(f->fd, buffer, MAXBUFSIZ);
113 			break;
114 		default:
115 			/* Make sure we exit with an error */
116 			nr = -1;
117 		}
118 		if (nr == -1)
119 			return -1;
120 	}
121 #endif
122 	if (nr == -1) {
123 		nr = read(f->fd, buffer, MAXBUFSIZ);
124 	}
125 
126 	if (nr < 0)
127 		return (-1);
128 
129 	bufrem = nr;
130 	return (0);
131 }
132 
133 static inline void
134 grep_lnbufgrow(size_t newlen)
135 {
136 
137 	if (lnbuflen < newlen) {
138 		lnbuf = grep_realloc(lnbuf, newlen);
139 		lnbuflen = newlen;
140 	}
141 }
142 
143 static void
144 grep_copyline(size_t off, size_t len)
145 {
146 	memcpy(lnbuf + off, bufpos, len);
147 	lnbuf[off + len] = '\0';
148 }
149 
150 char *
151 grep_fgetln(struct file *f, size_t *lenp)
152 {
153 	unsigned char *p;
154 	size_t len;
155 	size_t off;
156 	ptrdiff_t diff;
157 
158 	/* Fill the buffer, if necessary */
159 	if (bufrem == 0 && grep_refill(f) != 0)
160 		goto error;
161 
162 	if (bufrem == 0) {
163 		/* Return zero length to indicate EOF */
164 		*lenp = 0;
165 		return ((char *)bufpos);
166 	}
167 
168 	/* Look for a newline in the remaining part of the buffer */
169 	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
170 		++p; /* advance over newline */
171 		len = p - bufpos;
172 		grep_lnbufgrow(len + 1);
173 		grep_copyline(0, len);
174 		*lenp = len;
175 		bufrem -= len;
176 		bufpos = p;
177 		return (char *)lnbuf;
178 	}
179 
180 	/* We have to copy the current buffered data to the line buffer */
181 	for (len = bufrem, off = 0; ; len += bufrem) {
182 		/* Make sure there is room for more data */
183 		grep_lnbufgrow(len + LNBUFBUMP);
184 		grep_copyline(off, len - off);
185 		off = len;
186 		if (grep_refill(f) != 0)
187 			goto error;
188 		if (bufrem == 0)
189 			/* EOF: return partial line */
190 			break;
191 		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
192 			continue;
193 		/* got it: finish up the line (like code above) */
194 		++p;
195 		diff = p - bufpos;
196 		len += diff;
197 		grep_lnbufgrow(len + 1);
198 		grep_copyline(off, diff);
199 		bufrem -= diff;
200 		bufpos = p;
201 		break;
202 	}
203 	*lenp = len;
204 	return ((char *)lnbuf);
205 
206 error:
207 	*lenp = 0;
208 	return (NULL);
209 }
210 
211 static inline struct file *
212 grep_file_init(struct file *f)
213 {
214 
215 #ifndef WITHOUT_GZIP
216 	if (filebehave == FILE_GZIP &&
217 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
218 		goto error;
219 #endif
220 
221 #ifndef WITHOUT_BZ2
222 	if (filebehave == FILE_BZIP &&
223 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
224 		goto error;
225 #endif
226 
227 	/* Fill read buffer, also catches errors early */
228 	if (grep_refill(f) != 0)
229 		goto error;
230 
231 	/* Check for binary stuff, if necessary */
232 	if (!nulldataflag && binbehave != BINFILE_TEXT &&
233 	    memchr(bufpos, '\0', bufrem) != NULL)
234 		f->binary = true;
235 
236 	return (f);
237 error:
238 	close(f->fd);
239 	free(f);
240 	return (NULL);
241 }
242 
243 /*
244  * Opens a file for processing.
245  */
246 struct file *
247 grep_open(const char *path)
248 {
249 	struct file *f;
250 
251 	f = grep_malloc(sizeof *f);
252 	memset(f, 0, sizeof *f);
253 	if (path == NULL) {
254 		/* Processing stdin implies --line-buffered. */
255 		lbflag = true;
256 		f->fd = STDIN_FILENO;
257 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
258 		free(f);
259 		return (NULL);
260 	}
261 
262 	return (grep_file_init(f));
263 }
264 
265 /*
266  * Closes a file.
267  */
268 void
269 grep_close(struct file *f)
270 {
271 
272 	close(f->fd);
273 
274 	/* Reset read buffer and line buffer */
275 	bufpos = buffer;
276 	bufrem = 0;
277 
278 	free(lnbuf);
279 	lnbuf = NULL;
280 	lnbuflen = 0;
281 }
282