xref: /openbsd-src/usr.bin/grep/util.c (revision 19f8bc08c6facb51ffc96c95cef5f64b07da4cfe)
1*19f8bc08Sotto /*	$OpenBSD: util.c,v 1.55 2016/04/04 05:49:47 otto Exp $	*/
2fd6e2b5bSderaadt 
3fe07e37bSderaadt /*-
4fe07e37bSderaadt  * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5fe07e37bSderaadt  * All rights reserved.
6fe07e37bSderaadt  *
7fe07e37bSderaadt  * Redistribution and use in source and binary forms, with or without
8fe07e37bSderaadt  * modification, are permitted provided that the following conditions
9fe07e37bSderaadt  * are met:
10fe07e37bSderaadt  * 1. Redistributions of source code must retain the above copyright
11fe07e37bSderaadt  *    notice, this list of conditions and the following disclaimer.
12fe07e37bSderaadt  * 2. Redistributions in binary form must reproduce the above copyright
13fe07e37bSderaadt  *    notice, this list of conditions and the following disclaimer in the
14fe07e37bSderaadt  *    documentation and/or other materials provided with the distribution.
15fe07e37bSderaadt  *
16fe07e37bSderaadt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17fe07e37bSderaadt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18fe07e37bSderaadt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19fe07e37bSderaadt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20fe07e37bSderaadt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21fe07e37bSderaadt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22fe07e37bSderaadt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23fe07e37bSderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24fe07e37bSderaadt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25fe07e37bSderaadt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26fe07e37bSderaadt  * SUCH DAMAGE.
27fe07e37bSderaadt  */
28fe07e37bSderaadt 
29fe07e37bSderaadt #include <sys/types.h>
30fe07e37bSderaadt #include <sys/stat.h>
31fe07e37bSderaadt 
32fe07e37bSderaadt #include <ctype.h>
33fe07e37bSderaadt #include <err.h>
34fe07e37bSderaadt #include <errno.h>
35fe07e37bSderaadt #include <fts.h>
36fe07e37bSderaadt #include <regex.h>
3757c37fdaSmillert #include <stdbool.h>
38fe07e37bSderaadt #include <stdio.h>
39fe07e37bSderaadt #include <stdlib.h>
40fe07e37bSderaadt #include <string.h>
41fe07e37bSderaadt #include <unistd.h>
42fe07e37bSderaadt #include <zlib.h>
43fe07e37bSderaadt 
44fe07e37bSderaadt #include "grep.h"
45fe07e37bSderaadt 
46fe07e37bSderaadt /*
47fe07e37bSderaadt  * Process a file line by line...
48fe07e37bSderaadt  */
49fe07e37bSderaadt 
50fe07e37bSderaadt static int	linesqueued;
51f057df86Stedu static int	procline(str_t *l, int);
5214774268Stedu static int	grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch);
53f56cb323Stedu #ifndef SMALL
5457c37fdaSmillert static bool	grep_cmp(const char *, const char *, size_t);
5570c36c4eStedu static void	grep_revstr(unsigned char *, int);
56f56cb323Stedu #endif
57fe07e37bSderaadt 
58fe07e37bSderaadt int
59fe07e37bSderaadt grep_tree(char **argv)
60fe07e37bSderaadt {
61fe07e37bSderaadt 	FTS	*fts;
62fe07e37bSderaadt 	FTSENT	*p;
63fe07e37bSderaadt 	int	c, fts_flags;
64fe07e37bSderaadt 
6540155c30Stedu 	c = 0;
66fe07e37bSderaadt 
6740155c30Stedu 	fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR;
68fe07e37bSderaadt 
690536143aSmillert 	if (!(fts = fts_open(argv, fts_flags, NULL)))
70f91b5226Smillert 		err(2, NULL);
71fe07e37bSderaadt 	while ((p = fts_read(fts)) != NULL) {
72fe07e37bSderaadt 		switch (p->fts_info) {
73fe07e37bSderaadt 		case FTS_DNR:
74fe07e37bSderaadt 			break;
75fe07e37bSderaadt 		case FTS_ERR:
7620351096Smillert 			file_err = 1;
7720351096Smillert 			if(!sflag)
785ad04d35Sguenther 				warnc(p->fts_errno, "%s", p->fts_path);
79fe07e37bSderaadt 			break;
80fe07e37bSderaadt 		case FTS_DP:
81fe07e37bSderaadt 			break;
82fe07e37bSderaadt 		default:
83fe07e37bSderaadt 			c += procfile(p->fts_path);
84fe07e37bSderaadt 			break;
85fe07e37bSderaadt 		}
86fe07e37bSderaadt 	}
874760cb14Sotto 	if (errno)
884760cb14Sotto 		err(2, "fts_read");
89f12e5d26Suebayasi 	fts_close(fts);
90fe07e37bSderaadt 	return c;
91fe07e37bSderaadt }
92fe07e37bSderaadt 
93fe07e37bSderaadt int
94fe07e37bSderaadt procfile(char *fn)
95fe07e37bSderaadt {
96fe07e37bSderaadt 	str_t ln;
97fe07e37bSderaadt 	file_t *f;
98f057df86Stedu 	int c, t, z, nottext;
99fe07e37bSderaadt 
100fe07e37bSderaadt 	if (fn == NULL) {
101fe07e37bSderaadt 		fn = "(standard input)";
102fe07e37bSderaadt 		f = grep_fdopen(STDIN_FILENO, "r");
103fe07e37bSderaadt 	} else {
104fe07e37bSderaadt 		f = grep_open(fn, "r");
105fe07e37bSderaadt 	}
106fe07e37bSderaadt 	if (f == NULL) {
10720351096Smillert 		file_err = 1;
108fe07e37bSderaadt 		if (!sflag)
109fe07e37bSderaadt 			warn("%s", fn);
110fe07e37bSderaadt 		return 0;
111fe07e37bSderaadt 	}
112f057df86Stedu 
113f057df86Stedu 	nottext = grep_bin_file(f);
114f057df86Stedu 	if (nottext && binbehave == BIN_FILE_SKIP) {
115fe07e37bSderaadt 		grep_close(f);
116fe07e37bSderaadt 		return 0;
117fe07e37bSderaadt 	}
118fe07e37bSderaadt 
119fe07e37bSderaadt 	ln.file = fn;
120fe07e37bSderaadt 	ln.line_no = 0;
1211d5054b0Sespie 	ln.len = 0;
122fe07e37bSderaadt 	linesqueued = 0;
1232eb140a3Sjaredy 	tail = 0;
124fe07e37bSderaadt 	ln.off = -1;
125fe07e37bSderaadt 
126fe07e37bSderaadt 	if (Bflag > 0)
127fe07e37bSderaadt 		initqueue();
12899210d50Sotto 	for (c = 0;  c == 0 || !(lflag || qflag); ) {
129fe07e37bSderaadt 		ln.off += ln.len + 1;
130fe07e37bSderaadt 		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
131fe07e37bSderaadt 			break;
132fe07e37bSderaadt 		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
133fe07e37bSderaadt 			--ln.len;
134fe07e37bSderaadt 		ln.line_no++;
135fe07e37bSderaadt 
136fe07e37bSderaadt 		z = tail;
137fe07e37bSderaadt 
138f057df86Stedu 		if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
139fe07e37bSderaadt 			enqueue(&ln);
140fe07e37bSderaadt 			linesqueued++;
141fe07e37bSderaadt 		}
142fe07e37bSderaadt 		c += t;
143fe07e37bSderaadt 	}
144fe07e37bSderaadt 	if (Bflag > 0)
145fe07e37bSderaadt 		clearqueue();
146fe07e37bSderaadt 	grep_close(f);
147fe07e37bSderaadt 
148fe07e37bSderaadt 	if (cflag) {
149fe07e37bSderaadt 		if (!hflag)
150fe07e37bSderaadt 			printf("%s:", ln.file);
151fe07e37bSderaadt 		printf("%u\n", c);
152fe07e37bSderaadt 	}
153fe07e37bSderaadt 	if (lflag && c != 0)
154fe07e37bSderaadt 		printf("%s\n", fn);
155fe07e37bSderaadt 	if (Lflag && c == 0)
156fe07e37bSderaadt 		printf("%s\n", fn);
157f057df86Stedu 	if (c && !cflag && !lflag && !Lflag &&
1580c0c855bStedu 	    binbehave == BIN_FILE_BIN && nottext && !qflag)
159f057df86Stedu 		printf("Binary file %s matches\n", fn);
160f057df86Stedu 
161fe07e37bSderaadt 	return c;
162fe07e37bSderaadt }
163fe07e37bSderaadt 
164fe07e37bSderaadt 
165fe07e37bSderaadt /*
166fe07e37bSderaadt  * Process an individual line in a file. Return non-zero if it matches.
167fe07e37bSderaadt  */
168fe07e37bSderaadt 
1696cd4fad2Sderaadt #define isword(x) (isalnum((unsigned char)x) || (x) == '_')
170fe07e37bSderaadt 
171fe07e37bSderaadt static int
172f057df86Stedu procline(str_t *l, int nottext)
173fe07e37bSderaadt {
174fe07e37bSderaadt 	regmatch_t	pmatch;
175c85fc9dcSdhartmei 	int		c, i, r;
176f7ac433eSaschrijver 	regoff_t	offset;
177f7ac433eSaschrijver 
178f7ac433eSaschrijver 	/* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
179f7ac433eSaschrijver 	 * into regoff_t */
180f7ac433eSaschrijver 	if (l->len > SSIZE_MAX) {
181f7ac433eSaschrijver 		errx(2, "Line is too big to process");
182f7ac433eSaschrijver 	}
183fe07e37bSderaadt 
18414774268Stedu 	c = 0;
18514774268Stedu 	i = 0;
186fe07e37bSderaadt 	if (matchall) {
1871b60208bStedu 		c = 1;
188fe07e37bSderaadt 		goto print;
189fe07e37bSderaadt 	}
190fe07e37bSderaadt 
19114774268Stedu 	for (i = 0; i < patterns; i++) {
19214774268Stedu 		offset = 0;
19314774268Stedu redo:
1940c9d1b5fSmillert 		if (fg_pattern[i].pattern) {
19514774268Stedu 			r = grep_search(&fg_pattern[i], l->dat + offset,
19614774268Stedu 			    l->len - offset, &pmatch);
19714774268Stedu 			pmatch.rm_so += offset;
19814774268Stedu 			pmatch.rm_eo += offset;
1990c9d1b5fSmillert 		} else {
20014774268Stedu 			pmatch.rm_so = offset;
201cbb0f937Sotto 			pmatch.rm_eo = l->len;
202674ee822Smillert 			r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
203fe07e37bSderaadt 		}
2040c9d1b5fSmillert 		if (r == 0 && xflag) {
205fe07e37bSderaadt 			if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
206fe07e37bSderaadt 				r = REG_NOMATCH;
207fe07e37bSderaadt 		}
208c85fc9dcSdhartmei 		if (r == 0) {
20914774268Stedu 			c = 1;
21031a151d0Smillert 			if (oflag && pmatch.rm_so != pmatch.rm_eo)
21114774268Stedu 				goto print;
212fe07e37bSderaadt 			break;
213fe07e37bSderaadt 		}
214fe07e37bSderaadt 	}
21514774268Stedu 	if (oflag)
21614774268Stedu 		return c;
21714774268Stedu print:
218c85fc9dcSdhartmei 	if (vflag)
219c85fc9dcSdhartmei 		c = !c;
220fe07e37bSderaadt 
221f057df86Stedu 	if (c && binbehave == BIN_FILE_BIN && nottext)
222f057df86Stedu 		return c; /* Binary file */
223f057df86Stedu 
224fe07e37bSderaadt 	if ((tail > 0 || c) && !cflag && !qflag) {
225fe07e37bSderaadt 		if (c) {
226621edca7Sderaadt 			if (first > 0 && tail == 0 && (Bflag < linesqueued) &&
227621edca7Sderaadt 			    (Aflag || Bflag))
228fe07e37bSderaadt 				printf("--\n");
229fe07e37bSderaadt 			first = 1;
230fe07e37bSderaadt 			tail = Aflag;
231fe07e37bSderaadt 			if (Bflag > 0)
232fe07e37bSderaadt 				printqueue();
233fe07e37bSderaadt 			linesqueued = 0;
23414774268Stedu 			printline(l, ':', oflag ? &pmatch : NULL);
235fe07e37bSderaadt 		} else {
23614774268Stedu 			printline(l, '-', oflag ? &pmatch : NULL);
237fe07e37bSderaadt 			tail--;
238fe07e37bSderaadt 		}
239fe07e37bSderaadt 	}
24014774268Stedu 	if (oflag && !matchall) {
24114774268Stedu 		offset = pmatch.rm_eo;
24214774268Stedu 		goto redo;
24314774268Stedu 	}
244fe07e37bSderaadt 	return c;
245fe07e37bSderaadt }
246fe07e37bSderaadt 
247f56cb323Stedu #ifndef SMALL
2485c151985Sotto void
2496cd4fad2Sderaadt fgrepcomp(fastgrep_t *fg, const unsigned char *pattern)
250acf867c4Smillert {
251acf867c4Smillert 	int i;
252acf867c4Smillert 
253acf867c4Smillert 	/* Initialize. */
254acf867c4Smillert 	fg->patternLen = strlen(pattern);
255acf867c4Smillert 	fg->bol = 0;
256acf867c4Smillert 	fg->eol = 0;
257acf867c4Smillert 	fg->wmatch = wflag;
258acf867c4Smillert 	fg->reversedSearch = 0;
259acf867c4Smillert 
260acf867c4Smillert 	/*
261acf867c4Smillert 	 * Make a copy and upper case it for later if in -i mode,
262acf867c4Smillert 	 * else just copy the pointer.
263acf867c4Smillert 	 */
264acf867c4Smillert 	if (iflag) {
265acf867c4Smillert 		fg->pattern = grep_malloc(fg->patternLen + 1);
266acf867c4Smillert 		for (i = 0; i < fg->patternLen; i++)
267acf867c4Smillert 			fg->pattern[i] = toupper(pattern[i]);
268acf867c4Smillert 		fg->pattern[fg->patternLen] = '\0';
269acf867c4Smillert 	} else
270aeda29f3Sderaadt 		fg->pattern = (unsigned char *)pattern;	/* really const */
271acf867c4Smillert 
272acf867c4Smillert 	/* Preprocess pattern. */
273acf867c4Smillert 	for (i = 0; i <= UCHAR_MAX; i++)
274acf867c4Smillert 		fg->qsBc[i] = fg->patternLen;
275acf867c4Smillert 	for (i = 1; i < fg->patternLen; i++) {
276acf867c4Smillert 		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
277acf867c4Smillert 		/*
278acf867c4Smillert 		 * If case is ignored, make the jump apply to both upper and
279acf867c4Smillert 		 * lower cased characters.  As the pattern is stored in upper
280acf867c4Smillert 		 * case, apply the same to the lower case equivalents.
281acf867c4Smillert 		 */
282acf867c4Smillert 		if (iflag)
283acf867c4Smillert 			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
284acf867c4Smillert 	}
285acf867c4Smillert }
286f56cb323Stedu #endif
287acf867c4Smillert 
288acf867c4Smillert /*
289acf867c4Smillert  * Returns: -1 on failure, 0 on success
290acf867c4Smillert  */
291acf867c4Smillert int
29270c36c4eStedu fastcomp(fastgrep_t *fg, const char *pattern)
29370c36c4eStedu {
294f56cb323Stedu #ifdef SMALL
295f56cb323Stedu 	return -1;
296f56cb323Stedu #else
29770c36c4eStedu 	int i;
29870c36c4eStedu 	int bol = 0;
29970c36c4eStedu 	int eol = 0;
30070c36c4eStedu 	int shiftPatternLen;
30170c36c4eStedu 	int hasDot = 0;
30270c36c4eStedu 	int firstHalfDot = -1;
30370c36c4eStedu 	int firstLastHalfDot = -1;
30470c36c4eStedu 	int lastHalfDot = 0;
30570c36c4eStedu 
30670c36c4eStedu 	/* Initialize. */
307aeda29f3Sderaadt 	fg->patternLen = strlen(pattern);
30870c36c4eStedu 	fg->bol = 0;
30970c36c4eStedu 	fg->eol = 0;
3100c9d1b5fSmillert 	fg->wmatch = 0;
31170c36c4eStedu 	fg->reversedSearch = 0;
31270c36c4eStedu 
31370c36c4eStedu 	/* Remove end-of-line character ('$'). */
31451098fefSeric 	if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') {
31570c36c4eStedu 		eol++;
31670c36c4eStedu 		fg->eol = 1;
31770c36c4eStedu 		fg->patternLen--;
31870c36c4eStedu 	}
31970c36c4eStedu 
32070c36c4eStedu 	/* Remove beginning-of-line character ('^'). */
32170c36c4eStedu 	if (pattern[0] == '^') {
32270c36c4eStedu 		bol++;
32370c36c4eStedu 		fg->bol = 1;
32470c36c4eStedu 		fg->patternLen--;
32570c36c4eStedu 	}
32670c36c4eStedu 
3270c9d1b5fSmillert 	/* Remove enclosing [[:<:]] and [[:>:]] (word match). */
32830c8a262Sotto 	if (wflag) {
32930c8a262Sotto 		/* basic re's use \( \), extended re's ( ) */
33030c8a262Sotto 		int extra = Eflag ? 1 : 2;
33130c8a262Sotto 		fg->patternLen -= 14 + 2 * extra;
33230c8a262Sotto 		fg->wmatch = 7 + extra;
33330c8a262Sotto 	} else if (fg->patternLen >= 14 &&
3340c9d1b5fSmillert 	    strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
335394a0db6Smillert 	    strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
3360c9d1b5fSmillert 		fg->patternLen -= 14;
3370c9d1b5fSmillert 		fg->wmatch = 7;
3380c9d1b5fSmillert 	}
3390c9d1b5fSmillert 
34070c36c4eStedu 	/*
3410c9d1b5fSmillert 	 * Copy pattern minus '^' and '$' characters as well as word
3420c9d1b5fSmillert 	 * match character classes at the beginning and ending of the
3430c9d1b5fSmillert 	 * string respectively.
34470c36c4eStedu 	 */
3450c9d1b5fSmillert 	fg->pattern = grep_malloc(fg->patternLen + 1);
3460c9d1b5fSmillert 	memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
3470c9d1b5fSmillert 	fg->pattern[fg->patternLen] = '\0';
34870c36c4eStedu 
34970c36c4eStedu 	/* Look for ways to cheat...er...avoid the full regex engine. */
35070c36c4eStedu 	for (i = 0; i < fg->patternLen; i++)
35170c36c4eStedu 	{
352bcaf0f1eStedu 		switch (fg->pattern[i]) {
353bcaf0f1eStedu 		case '.':
35470c36c4eStedu 			hasDot = i;
35570c36c4eStedu 			if (i < fg->patternLen / 2) {
35603c8a690Sotto 				if (firstHalfDot < 0)
35770c36c4eStedu 					/* Closest dot to the beginning */
35870c36c4eStedu 					firstHalfDot = i;
35970c36c4eStedu 			} else {
36070c36c4eStedu 				/* Closest dot to the end of the pattern. */
36170c36c4eStedu 				lastHalfDot = i;
36270c36c4eStedu 				if (firstLastHalfDot < 0)
36370c36c4eStedu 					firstLastHalfDot = i;
36470c36c4eStedu 			}
365bcaf0f1eStedu 			break;
366bcaf0f1eStedu 		case '(': case ')':
367bcaf0f1eStedu 		case '{': case '}':
368bcaf0f1eStedu 			/* Special in BRE if preceded by '\\' */
369bcaf0f1eStedu 		case '?':
370bcaf0f1eStedu 		case '+':
371bcaf0f1eStedu 		case '|':
372bcaf0f1eStedu 			/* Not special in BRE. */
373bcaf0f1eStedu 			if (!Eflag)
374bcaf0f1eStedu 				goto nonspecial;
375bcaf0f1eStedu 		case '\\':
376bcaf0f1eStedu 		case '*':
377bcaf0f1eStedu 		case '[': case ']':
37870c36c4eStedu 			/* Free memory and let others know this is empty. */
37970c36c4eStedu 			free(fg->pattern);
38070c36c4eStedu 			fg->pattern = NULL;
38170c36c4eStedu 			return (-1);
382bcaf0f1eStedu 		default:
383bcaf0f1eStedu nonspecial:
384bcaf0f1eStedu 			if (iflag)
385bcaf0f1eStedu 				fg->pattern[i] = toupper(fg->pattern[i]);
386bcaf0f1eStedu 			break;
38770c36c4eStedu 		}
38870c36c4eStedu 	}
38970c36c4eStedu 
39070c36c4eStedu 	/*
39170c36c4eStedu 	 * Determine if a reverse search would be faster based on the placement
39270c36c4eStedu 	 * of the dots.
39370c36c4eStedu 	 */
394*19f8bc08Sotto 	if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) &&
39570c36c4eStedu 	    ((lastHalfDot) && ((firstHalfDot < 0) ||
39670c36c4eStedu 	    ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
39770c36c4eStedu 		fg->reversedSearch = 1;
39870c36c4eStedu 		hasDot = fg->patternLen - (firstHalfDot < 0 ?
39970c36c4eStedu 		    firstLastHalfDot : firstHalfDot) - 1;
40070c36c4eStedu 		grep_revstr(fg->pattern, fg->patternLen);
40170c36c4eStedu 	}
40270c36c4eStedu 
40370c36c4eStedu 	/*
40470c36c4eStedu 	 * Normal Quick Search would require a shift based on the position the
40570c36c4eStedu 	 * next character after the comparison is within the pattern.  With
40670c36c4eStedu 	 * wildcards, the position of the last dot effects the maximum shift
40770c36c4eStedu 	 * distance.
40870c36c4eStedu 	 * The closer to the end the wild card is the slower the search.  A
40970c36c4eStedu 	 * reverse version of this algorithm would be useful for wildcards near
41070c36c4eStedu 	 * the end of the string.
41170c36c4eStedu 	 *
41270c36c4eStedu 	 * Examples:
41370c36c4eStedu 	 * Pattern	Max shift
41470c36c4eStedu 	 * -------	---------
41570c36c4eStedu 	 * this		5
41670c36c4eStedu 	 * .his		4
41770c36c4eStedu 	 * t.is		3
41870c36c4eStedu 	 * th.s		2
41970c36c4eStedu 	 * thi.		1
42070c36c4eStedu 	 */
42170c36c4eStedu 
42270c36c4eStedu 	/* Adjust the shift based on location of the last dot ('.'). */
42370c36c4eStedu 	shiftPatternLen = fg->patternLen - hasDot;
42470c36c4eStedu 
42570c36c4eStedu 	/* Preprocess pattern. */
42670c36c4eStedu 	for (i = 0; i <= UCHAR_MAX; i++)
42770c36c4eStedu 		fg->qsBc[i] = shiftPatternLen;
42870c36c4eStedu 	for (i = hasDot + 1; i < fg->patternLen; i++) {
42970c36c4eStedu 		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
43070c36c4eStedu 		/*
43170c36c4eStedu 		 * If case is ignored, make the jump apply to both upper and
43270c36c4eStedu 		 * lower cased characters.  As the pattern is stored in upper
43370c36c4eStedu 		 * case, apply the same to the lower case equivalents.
43470c36c4eStedu 		 */
43570c36c4eStedu 		if (iflag)
43670c36c4eStedu 			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
43770c36c4eStedu 	}
43870c36c4eStedu 
43970c36c4eStedu 	/*
44070c36c4eStedu 	 * Put pattern back to normal after pre-processing to allow for easy
44170c36c4eStedu 	 * comparisons later.
44270c36c4eStedu 	 */
44370c36c4eStedu 	if (fg->reversedSearch)
44470c36c4eStedu 		grep_revstr(fg->pattern, fg->patternLen);
44570c36c4eStedu 
44670c36c4eStedu 	return (0);
447f56cb323Stedu #endif
44870c36c4eStedu }
44970c36c4eStedu 
450809cad80Sotto /*
451809cad80Sotto  * Word boundaries using regular expressions are defined as the point
452809cad80Sotto  * of transition from a non-word char to a word char, or vice versa.
453809cad80Sotto  * This means that grep -w +a and grep -w a+ never match anything,
454809cad80Sotto  * because they lack a starting or ending transition, but grep -w a+b
455809cad80Sotto  * does match a line containing a+b.
456809cad80Sotto  */
4570c9d1b5fSmillert #define wmatch(d, l, s, e)	\
458809cad80Sotto 	((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
459809cad80Sotto 	  e > s && isword(d[s]) && isword(d[e-1]))
4600c9d1b5fSmillert 
461674ee822Smillert static int
46214774268Stedu grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch)
46370c36c4eStedu {
464f56cb323Stedu #ifdef SMALL
465f56cb323Stedu 	return 0;
466f56cb323Stedu #else
467f7ac433eSaschrijver 	regoff_t j;
46870c36c4eStedu 	int rtrnVal = REG_NOMATCH;
46970c36c4eStedu 
470674ee822Smillert 	pmatch->rm_so = -1;
471674ee822Smillert 	pmatch->rm_eo = -1;
472674ee822Smillert 
47370c36c4eStedu 	/* No point in going farther if we do not have enough data. */
47470c36c4eStedu 	if (dataLen < fg->patternLen)
47570c36c4eStedu 		return (rtrnVal);
47670c36c4eStedu 
47770c36c4eStedu 	/* Only try once at the beginning or ending of the line. */
47870c36c4eStedu 	if (fg->bol || fg->eol) {
47970c36c4eStedu 		/* Simple text comparison. */
48070c36c4eStedu 		/* Verify data is >= pattern length before searching on it. */
48170c36c4eStedu 		if (dataLen >= fg->patternLen) {
48270c36c4eStedu 			/* Determine where in data to start search at. */
48370c36c4eStedu 			if (fg->eol)
48470c36c4eStedu 				j = dataLen - fg->patternLen;
48570c36c4eStedu 			else
48670c36c4eStedu 				j = 0;
48770c36c4eStedu 			if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
4880c9d1b5fSmillert 				if (grep_cmp(fg->pattern, data + j,
48957c37fdaSmillert 				    fg->patternLen)) {
490674ee822Smillert 					pmatch->rm_so = j;
491674ee822Smillert 					pmatch->rm_eo = j + fg->patternLen;
4920c9d1b5fSmillert 					if (!fg->wmatch || wmatch(data, dataLen,
4930c9d1b5fSmillert 					    pmatch->rm_so, pmatch->rm_eo))
4940c9d1b5fSmillert 						rtrnVal = 0;
495674ee822Smillert 				}
49670c36c4eStedu 		}
49770c36c4eStedu 	} else if (fg->reversedSearch) {
49870c36c4eStedu 		/* Quick Search algorithm. */
49988410190Smillert 		j = dataLen;
50088410190Smillert 		do {
50170c36c4eStedu 			if (grep_cmp(fg->pattern, data + j - fg->patternLen,
50257c37fdaSmillert 			    fg->patternLen)) {
503674ee822Smillert 				pmatch->rm_so = j - fg->patternLen;
504674ee822Smillert 				pmatch->rm_eo = j;
5050c9d1b5fSmillert 				if (!fg->wmatch || wmatch(data, dataLen,
5060c9d1b5fSmillert 				    pmatch->rm_so, pmatch->rm_eo)) {
5070c9d1b5fSmillert 					rtrnVal = 0;
50870c36c4eStedu 					break;
50970c36c4eStedu 				}
5100c9d1b5fSmillert 			}
51188410190Smillert 			/* Shift if within bounds, otherwise, we are done. */
51288410190Smillert 			if (j == fg->patternLen)
51388410190Smillert 				break;
51414774268Stedu 			j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]];
51588410190Smillert 		} while (j >= fg->patternLen);
51670c36c4eStedu 	} else {
51770c36c4eStedu 		/* Quick Search algorithm. */
51870c36c4eStedu 		j = 0;
51970c36c4eStedu 		do {
52057c37fdaSmillert 			if (grep_cmp(fg->pattern, data + j, fg->patternLen)) {
521674ee822Smillert 				pmatch->rm_so = j;
522674ee822Smillert 				pmatch->rm_eo = j + fg->patternLen;
5233d12d7eeSjaredy 				if (fg->patternLen == 0 || !fg->wmatch ||
5243d12d7eeSjaredy 				    wmatch(data, dataLen, pmatch->rm_so,
5253d12d7eeSjaredy 				    pmatch->rm_eo)) {
5260c9d1b5fSmillert 					rtrnVal = 0;
52770c36c4eStedu 					break;
52870c36c4eStedu 				}
5290c9d1b5fSmillert 			}
53070c36c4eStedu 
53170c36c4eStedu 			/* Shift if within bounds, otherwise, we are done. */
53270c36c4eStedu 			if (j + fg->patternLen == dataLen)
53370c36c4eStedu 				break;
53470c36c4eStedu 			else
53514774268Stedu 				j += fg->qsBc[(unsigned char)data[j + fg->patternLen]];
53670c36c4eStedu 		} while (j <= (dataLen - fg->patternLen));
53770c36c4eStedu 	}
53870c36c4eStedu 
53970c36c4eStedu 	return (rtrnVal);
540f56cb323Stedu #endif
54170c36c4eStedu }
54270c36c4eStedu 
54370c36c4eStedu 
544fe07e37bSderaadt void *
545fe07e37bSderaadt grep_malloc(size_t size)
546fe07e37bSderaadt {
547fe07e37bSderaadt 	void	*ptr;
548fe07e37bSderaadt 
549fe07e37bSderaadt 	if ((ptr = malloc(size)) == NULL)
550f91b5226Smillert 		err(2, "malloc");
551fe07e37bSderaadt 	return ptr;
552fe07e37bSderaadt }
553fe07e37bSderaadt 
554fe07e37bSderaadt void *
5551ed98fdfSderaadt grep_calloc(size_t nmemb, size_t size)
5561ed98fdfSderaadt {
5571ed98fdfSderaadt 	void	*ptr;
5581ed98fdfSderaadt 
5591ed98fdfSderaadt 	if ((ptr = calloc(nmemb, size)) == NULL)
5601ed98fdfSderaadt 		err(2, "calloc");
5611ed98fdfSderaadt 	return ptr;
5621ed98fdfSderaadt }
5631ed98fdfSderaadt 
5641ed98fdfSderaadt void *
565fe07e37bSderaadt grep_realloc(void *ptr, size_t size)
566fe07e37bSderaadt {
567fe07e37bSderaadt 	if ((ptr = realloc(ptr, size)) == NULL)
568f91b5226Smillert 		err(2, "realloc");
569fe07e37bSderaadt 	return ptr;
570fe07e37bSderaadt }
571fe07e37bSderaadt 
57262ab4a5fSderaadt void *
57362ab4a5fSderaadt grep_reallocarray(void *ptr, size_t nmemb, size_t size)
57462ab4a5fSderaadt {
57562ab4a5fSderaadt 	if ((ptr = reallocarray(ptr, nmemb, size)) == NULL)
57662ab4a5fSderaadt 		err(2, "reallocarray");
57762ab4a5fSderaadt 	return ptr;
57862ab4a5fSderaadt }
57962ab4a5fSderaadt 
580f56cb323Stedu #ifndef SMALL
58170c36c4eStedu /*
58257c37fdaSmillert  * Returns:	true on success, false on failure
58370c36c4eStedu  */
58457c37fdaSmillert static bool
58514774268Stedu grep_cmp(const char *pattern, const char *data, size_t len)
58670c36c4eStedu {
58757c37fdaSmillert 	size_t i;
58870c36c4eStedu 
58970c36c4eStedu 	for (i = 0; i < len; i++) {
590acf867c4Smillert 		if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
5910340d4f1Smmcc 		    || (iflag && pattern[i] == toupper((unsigned char)data[i])))
59270c36c4eStedu 			continue;
59357c37fdaSmillert 		return false;
59470c36c4eStedu 	}
59570c36c4eStedu 
59657c37fdaSmillert 	return true;
59770c36c4eStedu }
59870c36c4eStedu 
59970c36c4eStedu static void
60070c36c4eStedu grep_revstr(unsigned char *str, int len)
60170c36c4eStedu {
60270c36c4eStedu 	int i;
60370c36c4eStedu 	char c;
60470c36c4eStedu 
60570c36c4eStedu 	for (i = 0; i < len / 2; i++) {
60670c36c4eStedu 		c = str[i];
60770c36c4eStedu 		str[i] = str[len - i - 1];
60870c36c4eStedu 		str[len - i - 1] = c;
60970c36c4eStedu 	}
61070c36c4eStedu }
611f56cb323Stedu #endif
61270c36c4eStedu 
613fe07e37bSderaadt void
61414774268Stedu printline(str_t *line, int sep, regmatch_t *pmatch)
615fe07e37bSderaadt {
616fe07e37bSderaadt 	int n;
617fe07e37bSderaadt 
618fe07e37bSderaadt 	n = 0;
619fe07e37bSderaadt 	if (!hflag) {
620fe07e37bSderaadt 		fputs(line->file, stdout);
621fe07e37bSderaadt 		++n;
622fe07e37bSderaadt 	}
623fe07e37bSderaadt 	if (nflag) {
624fe07e37bSderaadt 		if (n)
625fe07e37bSderaadt 			putchar(sep);
6262f35e644Smmcc 		printf("%lld", line->line_no);
627fe07e37bSderaadt 		++n;
628fe07e37bSderaadt 	}
629fe07e37bSderaadt 	if (bflag) {
630fe07e37bSderaadt 		if (n)
631fe07e37bSderaadt 			putchar(sep);
632b02c2e6bSotto 		printf("%lld", (long long)line->off);
633b02c2e6bSotto 		++n;
634fe07e37bSderaadt 	}
635fe07e37bSderaadt 	if (n)
636fe07e37bSderaadt 		putchar(sep);
63714774268Stedu 	if (pmatch)
63814774268Stedu 		fwrite(line->dat + pmatch->rm_so,
63914774268Stedu 		    pmatch->rm_eo - pmatch->rm_so, 1, stdout);
64014774268Stedu 	else
641fe07e37bSderaadt 		fwrite(line->dat, line->len, 1, stdout);
642fe07e37bSderaadt 	putchar('\n');
643fe07e37bSderaadt }
644