xref: /netbsd-src/usr.bin/grep/util.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: util.c,v 1.6 2009/01/25 14:06:00 lukem Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 #ifndef lint
32 __RCSID("$NetBSD: util.c,v 1.6 2009/01/25 14:06:00 lukem Exp $");
33 #endif /* not lint */
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 
38 #include <ctype.h>
39 #include <zlib.h>
40 #include <err.h>
41 #include <errno.h>
42 #include <fts.h>
43 #include <regex.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 
49 #include "grep.h"
50 
51 /*
52  * Process a file line by line...
53  */
54 
55 static int linesqueued, newfile;
56 static int procline(str_t *l, int nottext);
57 
58 int
59 grep_tree(char **argv)
60 {
61 	FTS *fts;
62 	FTSENT *p;
63 	int c, fts_flags;
64 
65 	c = fts_flags = 0;
66 
67 /* 	if (linkbehave == LINK_EXPLICIT)
68 		fts_flags = FTS_COMFOLLOW;
69 	if (linkbehave == LINK_SKIP)
70 		fts_flags = FTS_PHYSICAL;
71 	if (linkbehave == LINK_FOLLOW)
72 		fts_flags = FTS_LOGICAL;*/
73 
74 	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR | FTS_LOGICAL;
75 
76 	if ((fts = fts_open(argv, fts_flags, NULL)) == NULL)
77 		err(2, NULL);
78 	while ((p = fts_read(fts)) != NULL) {
79 		switch (p->fts_info) {
80 		case FTS_DNR:
81 			break;
82 		case FTS_ERR:
83 			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
84 			break;
85 		case FTS_DP:
86 		case FTS_D:
87 			break;
88 		case FTS_DC:
89 			warnx("warning: %s: recursive directory loop",
90 				p->fts_path);
91 			break;
92 		default:
93 			c += procfile(p->fts_path);
94 			break;
95 		}
96 	}
97 
98 	return c;
99 }
100 
101 int
102 procfile(const char *fn)
103 {
104 	str_t ln;
105 	file_t *f;
106 	struct stat sb;
107 	mode_t s;
108 	int c, t, z, nottext, skip;
109 
110 	tail = 0;
111 	newfile = 1;
112 
113 	if (fn == NULL) {
114 		fn = stdin_label;
115 		f = grep_fdopen(STDIN_FILENO, "r");
116 	} else {
117 		skip = 1;
118 		if (dirbehave == GREP_SKIP || devbehave == GREP_SKIP) {
119 			if (stat(fn, &sb)) {
120 				fprintf(stderr, "Cannot stat %s %d\n",
121 					fn, errno);
122 				/* XXX record error variable */
123 			} else {
124 				s = sb.st_mode & S_IFMT;
125 				if (s == S_IFDIR && dirbehave == GREP_SKIP)
126 					skip = 0;
127 				if (   (s == S_IFIFO || s == S_IFCHR ||
128 					s == S_IFBLK || s == S_IFSOCK)
129 					&& devbehave == GREP_SKIP)
130 							skip = 0;
131 			}
132 		}
133 		if (skip == 0)
134 			return 0;
135 
136 		f = grep_open(fn, "r");
137 	}
138 	if (f == NULL) {
139 		if (!sflag)
140 			warn("%s", fn);
141 		return 0;
142 	}
143 
144 	nottext = grep_bin_file(f);
145 
146 	if (nottext && binbehave == BIN_FILE_SKIP) {
147 		/* Skip this file as it is binary */
148 		grep_close(f);
149 		return 0;
150 	}
151 
152 	ln.file = fn;
153 	ln.line_no = 0;
154 	linesqueued = 0;
155 	ln.off = -1;
156 
157 	if (Bflag > 0)
158 		initqueue();
159 	for (c = 0; !(lflag && c);) {
160 		ln.off += ln.len + 1;
161 		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
162 			break;
163 		if (ln.len > 0 && ln.dat[ln.len - 1] == line_endchar)
164 			--ln.len;
165 		ln.line_no++;
166 
167 		z = tail;
168 
169 		if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
170 			enqueue(&ln);
171 			linesqueued++;
172 		}
173 		c += t;
174 
175 		/* If we have a maximum number of matches, stop processing */
176 		if (mflag && c >= maxcount)
177 			break;
178 	}
179 	if (Bflag > 0)
180 		clearqueue();
181 	grep_close(f);
182 
183 	if (cflag) {
184 		if (output_filenames)
185 			printf("%s%c", ln.file, fn_colonchar);
186 		printf("%u\n", c);
187 	}
188 
189 	if (lflag && c != 0)
190 		printf("%s%c", fn, fn_endchar);
191 	if (Lflag && c == 0)
192 		printf("%s%c", fn, fn_endchar);
193 	if (c && !cflag && !lflag && !Lflag &&
194 		binbehave == BIN_FILE_BIN && nottext && !qflag)
195 			printf("Binary file %s matches\n", fn);
196 
197 	return c;
198 }
199 
200 
201 /*
202  * Process an individual line in a file. Return non-zero if it matches.
203  */
204 
205 #define isword(x) (isalnum((unsigned char)(x)) || (x) == '_')
206 
207 static int
208 procline(str_t *l, int nottext)
209 {
210 	regmatch_t pmatch;
211 	regmatch_t matches[MAX_LINE_MATCHES];
212 	int c = 0, i, r, t, m = 0;
213 	regoff_t st = 0;
214 
215 	if (matchall) {
216 		c = !vflag;
217 		goto print;
218 	}
219 
220 	t = vflag ? REG_NOMATCH : 0;
221 
222 	while (st >= 0 && (size_t)st <= l->len) {
223 		pmatch.rm_so = st;
224 		pmatch.rm_eo = l->len;
225 		for (i = 0; i < patterns; i++) {
226 			r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
227 			if (r == REG_NOMATCH && t == 0)
228 				continue;
229 			if (r == 0) {
230 				if (wflag) {
231 					if ((pmatch.rm_so != 0 && isword((unsigned char)l->dat[pmatch.rm_so - 1]))
232 					    || ((size_t)pmatch.rm_eo != l->len && isword((unsigned char)l->dat[pmatch.rm_eo])))
233 						r = REG_NOMATCH;
234 				}
235 				if (xflag) {
236 					if (pmatch.rm_so != 0 || (size_t)pmatch.rm_eo != l->len)
237 						r = REG_NOMATCH;
238 				}
239 			}
240 			if (r == t) {
241 				if (m == 0)
242 					c++;
243 				if (m < MAX_LINE_MATCHES) {
244 					matches[m] = pmatch;
245 					m++;
246 				}
247 				st = pmatch.rm_eo;
248 				break;
249 			}
250 		}
251 
252 		/* One pass if we are not recording matches */
253 		if (!oflag && !colours)
254 			break;
255 
256 		if (st == pmatch.rm_so)
257 			break; 	/* No matches */
258 
259 	}
260 
261 print:
262 
263 	if (c && binbehave == BIN_FILE_BIN && nottext)
264 		return c;	/* Binary file */
265 
266 	if ((tail > 0 || c) && !cflag && !qflag) {
267 		if (c) {
268 
269 			if ( (Aflag || Bflag) && first > 0 &&
270 			   ( (Bflag <= linesqueued && tail == 0) || newfile) )
271 						printf("--\n");
272 
273 			first = 1;
274 			newfile = 0;
275 			tail = Aflag;
276 			if (Bflag > 0)
277 				printqueue();
278 			linesqueued = 0;
279 			printline(l, fn_colonchar, matches, m);
280 		} else {
281 			printline(l, fn_dashchar, matches, m);
282 			tail--;
283 		}
284 
285 	}
286 	return c;
287 }
288 
289 void *
290 grep_malloc(size_t size)
291 {
292 	void *ptr;
293 
294 	if ((ptr = malloc(size)) == NULL)
295 		err(2, "malloc");
296 	return ptr;
297 }
298 
299 void *
300 grep_realloc(void *ptr, size_t size)
301 {
302 	if ((ptr = realloc(ptr, size)) == NULL)
303 		err(2, "realloc");
304 	return ptr;
305 }
306 
307 void
308 printline(str_t *line, int sep, regmatch_t *matches, int m)
309 {
310 	int i, n = 0;
311 	size_t a = 0;
312 
313 	if (output_filenames) {
314 		fputs(line->file, stdout);
315 		++n;
316 	}
317 	if (nflag) {
318 		if (n)
319 			putchar(sep);
320 		printf("%d", line->line_no);
321 		++n;
322 	}
323 	if (bflag) {
324 		if (n)
325 			putchar(sep);
326 		printf("%lu", (unsigned long)line->off);
327 	}
328 	if (n)
329 		putchar(sep);
330 
331 	if ((oflag || colours) && m > 0) {
332 
333 		for (i = 0; i < m; i++) {
334 
335 			if (!oflag)
336 				fwrite(line->dat + a, matches[i].rm_so - a, 1, stdout);
337 
338 			if (colours)
339 				fprintf(stdout, "\33[%sm", grep_colour);
340 			fwrite(line->dat + matches[i].rm_so,
341 				matches[i].rm_eo - matches[i].rm_so, 1, stdout);
342 
343 			if (colours)
344 				fprintf(stdout, "\33[00m");
345 			a = matches[i].rm_eo;
346 			if (oflag)
347 				putchar('\n');
348 		}
349 		if (!oflag) {
350 			if (line->len - a > 0)
351 				fwrite(line->dat + a, line->len - a, 1, stdout);
352 			putchar('\n');
353 		}
354 
355 
356 	} else {
357 		fwrite(line->dat, line->len, 1, stdout);
358 		putchar(line_endchar);
359 	}
360 
361 }
362