xref: /openbsd-src/usr.bin/grep/util.c (revision 03848a0fe6220d28d12d9675a1a2ec69ec26e367)
1*03848a0fSmillert /*	$OpenBSD: util.c,v 1.68 2023/11/15 00:50:43 millert Exp $	*/
2fd6e2b5bSderaadt 
3fe07e37bSderaadt /*-
4fe07e37bSderaadt  * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5fe07e37bSderaadt  * All rights reserved.
6fe07e37bSderaadt  *
7fe07e37bSderaadt  * Redistribution and use in source and binary forms, with or without
8fe07e37bSderaadt  * modification, are permitted provided that the following conditions
9fe07e37bSderaadt  * are met:
10fe07e37bSderaadt  * 1. Redistributions of source code must retain the above copyright
11fe07e37bSderaadt  *    notice, this list of conditions and the following disclaimer.
12fe07e37bSderaadt  * 2. Redistributions in binary form must reproduce the above copyright
13fe07e37bSderaadt  *    notice, this list of conditions and the following disclaimer in the
14fe07e37bSderaadt  *    documentation and/or other materials provided with the distribution.
15fe07e37bSderaadt  *
16fe07e37bSderaadt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17fe07e37bSderaadt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18fe07e37bSderaadt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19fe07e37bSderaadt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20fe07e37bSderaadt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21fe07e37bSderaadt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22fe07e37bSderaadt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23fe07e37bSderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24fe07e37bSderaadt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25fe07e37bSderaadt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26fe07e37bSderaadt  * SUCH DAMAGE.
27fe07e37bSderaadt  */
28fe07e37bSderaadt 
29fe07e37bSderaadt #include <sys/types.h>
30fe07e37bSderaadt #include <sys/stat.h>
31fe07e37bSderaadt 
32fe07e37bSderaadt #include <ctype.h>
33fe07e37bSderaadt #include <err.h>
34fe07e37bSderaadt #include <errno.h>
35fe07e37bSderaadt #include <fts.h>
36fe07e37bSderaadt #include <regex.h>
3757c37fdaSmillert #include <stdbool.h>
38fe07e37bSderaadt #include <stdio.h>
39fe07e37bSderaadt #include <stdlib.h>
40fe07e37bSderaadt #include <string.h>
41fe07e37bSderaadt #include <unistd.h>
42fe07e37bSderaadt #include <zlib.h>
43fe07e37bSderaadt 
44fe07e37bSderaadt #include "grep.h"
45fe07e37bSderaadt 
46fe07e37bSderaadt /*
47fe07e37bSderaadt  * Process a file line by line...
48fe07e37bSderaadt  */
49fe07e37bSderaadt 
50fe07e37bSderaadt static int	linesqueued;
51f057df86Stedu static int	procline(str_t *l, int);
52475af40dStedu static int	grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch, int);
53f56cb323Stedu #ifndef SMALL
5457c37fdaSmillert static bool	grep_cmp(const char *, const char *, size_t);
5570c36c4eStedu static void	grep_revstr(unsigned char *, int);
56f56cb323Stedu #endif
57fe07e37bSderaadt 
58fe07e37bSderaadt int
grep_tree(char ** argv)59fe07e37bSderaadt grep_tree(char **argv)
60fe07e37bSderaadt {
61fe07e37bSderaadt 	FTS	*fts;
62fe07e37bSderaadt 	FTSENT	*p;
63fe07e37bSderaadt 	int	c, fts_flags;
64e8a72117Sjca 	char	*dot_argv[] = { ".", NULL };
65e8a72117Sjca 	char	*path;
66e8a72117Sjca 
67e8a72117Sjca 	/* default to . if no path given */
68e8a72117Sjca 	if (argv[0] == NULL)
69e8a72117Sjca 		argv = dot_argv;
70fe07e37bSderaadt 
7140155c30Stedu 	c = 0;
72fe07e37bSderaadt 
7340155c30Stedu 	fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR;
74fe07e37bSderaadt 
750536143aSmillert 	if (!(fts = fts_open(argv, fts_flags, NULL)))
76f91b5226Smillert 		err(2, NULL);
77fe07e37bSderaadt 	while ((p = fts_read(fts)) != NULL) {
78fe07e37bSderaadt 		switch (p->fts_info) {
79fe07e37bSderaadt 		case FTS_DNR:
80fe07e37bSderaadt 			break;
81fe07e37bSderaadt 		case FTS_ERR:
8220351096Smillert 			file_err = 1;
8320351096Smillert 			if(!sflag)
845ad04d35Sguenther 				warnc(p->fts_errno, "%s", p->fts_path);
85fe07e37bSderaadt 			break;
863a21e479Stedu 		case FTS_D:
87fe07e37bSderaadt 		case FTS_DP:
88fe07e37bSderaadt 			break;
89fe07e37bSderaadt 		default:
90e8a72117Sjca 			path = p->fts_path;
91e8a72117Sjca 			/* skip "./" if implied */
92e8a72117Sjca 			if (argv == dot_argv && p->fts_pathlen >= 2)
93e8a72117Sjca 				path += 2;
94483368fbSmartijn 			c |= procfile(path);
95fe07e37bSderaadt 			break;
96fe07e37bSderaadt 		}
97fe07e37bSderaadt 	}
984760cb14Sotto 	if (errno)
994760cb14Sotto 		err(2, "fts_read");
100f12e5d26Suebayasi 	fts_close(fts);
101fe07e37bSderaadt 	return c;
102fe07e37bSderaadt }
103fe07e37bSderaadt 
104fe07e37bSderaadt int
procfile(char * fn)105fe07e37bSderaadt procfile(char *fn)
106fe07e37bSderaadt {
107fe07e37bSderaadt 	str_t ln;
108fe07e37bSderaadt 	file_t *f;
109483368fbSmartijn 	int t, z, nottext, overflow = 0;
110483368fbSmartijn 	unsigned long long c;
111fe07e37bSderaadt 
112b5430e4dSpirofti 	mcount = mlimit;
113b5430e4dSpirofti 
114fe07e37bSderaadt 	if (fn == NULL) {
115fe07e37bSderaadt 		fn = "(standard input)";
1163a21e479Stedu 		f = grep_fdopen(STDIN_FILENO);
117fe07e37bSderaadt 	} else {
1183a21e479Stedu 		f = grep_open(fn);
119fe07e37bSderaadt 	}
120fe07e37bSderaadt 	if (f == NULL) {
1213a21e479Stedu 		if (errno == EISDIR)
1223a21e479Stedu 			return 0;
12320351096Smillert 		file_err = 1;
124fe07e37bSderaadt 		if (!sflag)
125fe07e37bSderaadt 			warn("%s", fn);
126fe07e37bSderaadt 		return 0;
127fe07e37bSderaadt 	}
128f057df86Stedu 
129f057df86Stedu 	nottext = grep_bin_file(f);
130f057df86Stedu 	if (nottext && binbehave == BIN_FILE_SKIP) {
131fe07e37bSderaadt 		grep_close(f);
132fe07e37bSderaadt 		return 0;
133fe07e37bSderaadt 	}
134fe07e37bSderaadt 
135fe07e37bSderaadt 	ln.file = fn;
136e06eeb70Stedu 	if (labelname)
137e06eeb70Stedu 		ln.file = (char *)labelname;
138fe07e37bSderaadt 	ln.line_no = 0;
1391d5054b0Sespie 	ln.len = 0;
140fe07e37bSderaadt 	linesqueued = 0;
1412eb140a3Sjaredy 	tail = 0;
142fe07e37bSderaadt 	ln.off = -1;
143fe07e37bSderaadt 
144fe07e37bSderaadt 	if (Bflag > 0)
145fe07e37bSderaadt 		initqueue();
14699210d50Sotto 	for (c = 0;  c == 0 || !(lflag || qflag); ) {
147b5430e4dSpirofti 		if (mflag && mlimit == 0)
148b5430e4dSpirofti 			break;
149fe07e37bSderaadt 		ln.off += ln.len + 1;
150fe07e37bSderaadt 		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
151fe07e37bSderaadt 			break;
152fe07e37bSderaadt 		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
153fe07e37bSderaadt 			--ln.len;
154fe07e37bSderaadt 		ln.line_no++;
155fe07e37bSderaadt 
156fe07e37bSderaadt 		z = tail;
157fe07e37bSderaadt 
158f057df86Stedu 		if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
159fe07e37bSderaadt 			enqueue(&ln);
160fe07e37bSderaadt 			linesqueued++;
161fe07e37bSderaadt 		}
162483368fbSmartijn 		if (ULLONG_MAX - c < (unsigned long long)t)
163483368fbSmartijn 			overflow = 1;
164483368fbSmartijn 		else
165fe07e37bSderaadt 			c += t;
166eda4449cSdv 		if (mflag && mcount <= 0 && tail <= 0)
167b5430e4dSpirofti 			break;
168fe07e37bSderaadt 	}
169fe07e37bSderaadt 	if (Bflag > 0)
170fe07e37bSderaadt 		clearqueue();
171fe07e37bSderaadt 	grep_close(f);
172fe07e37bSderaadt 
173fe07e37bSderaadt 	if (cflag) {
174fe07e37bSderaadt 		if (!hflag)
1751fd6e0f2Sop 			printf("%s%c", ln.file, nullflag ? '\0' : ':');
176483368fbSmartijn 		printf("%llu%s\n", c, overflow ? "+" : "");
177fe07e37bSderaadt 	}
178fe07e37bSderaadt 	if (lflag && c != 0)
1791fd6e0f2Sop 		printf("%s%c", fn, nullflag ? '\0' : '\n');
180fe07e37bSderaadt 	if (Lflag && c == 0)
1811fd6e0f2Sop 		printf("%s%c", fn, nullflag ? '\0' : '\n');
182f057df86Stedu 	if (c && !cflag && !lflag && !Lflag &&
1830c0c855bStedu 	    binbehave == BIN_FILE_BIN && nottext && !qflag)
184f057df86Stedu 		printf("Binary file %s matches\n", fn);
185f057df86Stedu 
186483368fbSmartijn 	return overflow || c != 0;
187fe07e37bSderaadt }
188fe07e37bSderaadt 
189fe07e37bSderaadt 
190fe07e37bSderaadt /*
191fe07e37bSderaadt  * Process an individual line in a file. Return non-zero if it matches.
192fe07e37bSderaadt  */
193fe07e37bSderaadt 
1946cd4fad2Sderaadt #define isword(x) (isalnum((unsigned char)x) || (x) == '_')
195fe07e37bSderaadt 
196fe07e37bSderaadt static int
procline(str_t * l,int nottext)197f057df86Stedu procline(str_t *l, int nottext)
198fe07e37bSderaadt {
19925f2b9c3Stedu 	regmatch_t	pmatch = { 0 };
200*03848a0fSmillert 	int		c, i, r, counted;
201f7ac433eSaschrijver 	regoff_t	offset;
202f7ac433eSaschrijver 
203f7ac433eSaschrijver 	/* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
204f7ac433eSaschrijver 	 * into regoff_t */
205f7ac433eSaschrijver 	if (l->len > SSIZE_MAX) {
206f7ac433eSaschrijver 		errx(2, "Line is too big to process");
207f7ac433eSaschrijver 	}
208fe07e37bSderaadt 
20914774268Stedu 	c = 0;
21014774268Stedu 	i = 0;
211*03848a0fSmillert 	counted = 0;
212fe07e37bSderaadt 	if (matchall) {
2131b60208bStedu 		c = 1;
214fe07e37bSderaadt 		goto print;
215fe07e37bSderaadt 	}
216eda4449cSdv 	if (mflag && mcount <= 0)
217eda4449cSdv 		goto print;
218fe07e37bSderaadt 
21914774268Stedu 	for (i = 0; i < patterns; i++) {
22014774268Stedu 		offset = 0;
22114774268Stedu redo:
2220c9d1b5fSmillert 		if (fg_pattern[i].pattern) {
223475af40dStedu 			int flags = 0;
224475af40dStedu 			if (offset)
225475af40dStedu 				flags |= REG_NOTBOL;
22614774268Stedu 			r = grep_search(&fg_pattern[i], l->dat + offset,
227475af40dStedu 			    l->len - offset, &pmatch, flags);
22814774268Stedu 			pmatch.rm_so += offset;
22914774268Stedu 			pmatch.rm_eo += offset;
2300c9d1b5fSmillert 		} else {
231475af40dStedu 			int flags = eflags;
232475af40dStedu 			if (offset)
233475af40dStedu 				flags |= REG_NOTBOL;
23414774268Stedu 			pmatch.rm_so = offset;
235cbb0f937Sotto 			pmatch.rm_eo = l->len;
236475af40dStedu 			r = regexec(&r_pattern[i], l->dat, 1, &pmatch, flags);
237fe07e37bSderaadt 		}
2380c9d1b5fSmillert 		if (r == 0 && xflag) {
239fe07e37bSderaadt 			if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
240fe07e37bSderaadt 				r = REG_NOMATCH;
241fe07e37bSderaadt 		}
242c85fc9dcSdhartmei 		if (r == 0) {
24314774268Stedu 			c = 1;
24431a151d0Smillert 			if (oflag && pmatch.rm_so != pmatch.rm_eo)
24514774268Stedu 				goto print;
246fe07e37bSderaadt 			break;
247fe07e37bSderaadt 		}
248fe07e37bSderaadt 	}
24914774268Stedu 	if (oflag)
25014774268Stedu 		return c;
25114774268Stedu print:
252c85fc9dcSdhartmei 	if (vflag)
253c85fc9dcSdhartmei 		c = !c;
254fe07e37bSderaadt 
255*03848a0fSmillert 	/* Count the matches if there is a match limit (but only once). */
256*03848a0fSmillert 	if (mflag && !counted) {
257b5430e4dSpirofti 		mcount -= c;
258*03848a0fSmillert 		counted = 1;
259*03848a0fSmillert 	}
260b5430e4dSpirofti 
261f057df86Stedu 	if (c && binbehave == BIN_FILE_BIN && nottext)
262f057df86Stedu 		return c; /* Binary file */
263f057df86Stedu 
264fe07e37bSderaadt 	if ((tail > 0 || c) && !cflag && !qflag) {
265fe07e37bSderaadt 		if (c) {
2662774c493Sotto 			if (first > 0 && tail == 0 && (Aflag || (Bflag &&
2672774c493Sotto 			    Bflag < linesqueued)))
268fe07e37bSderaadt 				printf("--\n");
269fe07e37bSderaadt 			first = 1;
270fe07e37bSderaadt 			tail = Aflag;
271fe07e37bSderaadt 			if (Bflag > 0)
272fe07e37bSderaadt 				printqueue();
273fe07e37bSderaadt 			linesqueued = 0;
27414774268Stedu 			printline(l, ':', oflag ? &pmatch : NULL);
275fe07e37bSderaadt 		} else {
27614774268Stedu 			printline(l, '-', oflag ? &pmatch : NULL);
277fe07e37bSderaadt 			tail--;
278fe07e37bSderaadt 		}
279fe07e37bSderaadt 	}
28014774268Stedu 	if (oflag && !matchall) {
28114774268Stedu 		offset = pmatch.rm_eo;
28214774268Stedu 		goto redo;
28314774268Stedu 	}
284fe07e37bSderaadt 	return c;
285fe07e37bSderaadt }
286fe07e37bSderaadt 
287f56cb323Stedu #ifndef SMALL
2885c151985Sotto void
fgrepcomp(fastgrep_t * fg,const unsigned char * pattern)2896cd4fad2Sderaadt fgrepcomp(fastgrep_t *fg, const unsigned char *pattern)
290acf867c4Smillert {
291acf867c4Smillert 	int i;
292acf867c4Smillert 
293acf867c4Smillert 	/* Initialize. */
294acf867c4Smillert 	fg->patternLen = strlen(pattern);
295acf867c4Smillert 	fg->bol = 0;
296acf867c4Smillert 	fg->eol = 0;
297acf867c4Smillert 	fg->wmatch = wflag;
298acf867c4Smillert 	fg->reversedSearch = 0;
299acf867c4Smillert 
300acf867c4Smillert 	/*
301acf867c4Smillert 	 * Make a copy and upper case it for later if in -i mode,
302acf867c4Smillert 	 * else just copy the pointer.
303acf867c4Smillert 	 */
304acf867c4Smillert 	if (iflag) {
305acf867c4Smillert 		fg->pattern = grep_malloc(fg->patternLen + 1);
306acf867c4Smillert 		for (i = 0; i < fg->patternLen; i++)
307acf867c4Smillert 			fg->pattern[i] = toupper(pattern[i]);
308acf867c4Smillert 		fg->pattern[fg->patternLen] = '\0';
309acf867c4Smillert 	} else
310aeda29f3Sderaadt 		fg->pattern = (unsigned char *)pattern;	/* really const */
311acf867c4Smillert 
312acf867c4Smillert 	/* Preprocess pattern. */
313acf867c4Smillert 	for (i = 0; i <= UCHAR_MAX; i++)
314acf867c4Smillert 		fg->qsBc[i] = fg->patternLen;
315acf867c4Smillert 	for (i = 1; i < fg->patternLen; i++) {
316acf867c4Smillert 		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
317acf867c4Smillert 		/*
318acf867c4Smillert 		 * If case is ignored, make the jump apply to both upper and
319acf867c4Smillert 		 * lower cased characters.  As the pattern is stored in upper
320acf867c4Smillert 		 * case, apply the same to the lower case equivalents.
321acf867c4Smillert 		 */
322acf867c4Smillert 		if (iflag)
323acf867c4Smillert 			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
324acf867c4Smillert 	}
325acf867c4Smillert }
326f56cb323Stedu #endif
327acf867c4Smillert 
328acf867c4Smillert /*
329acf867c4Smillert  * Returns: -1 on failure, 0 on success
330acf867c4Smillert  */
331acf867c4Smillert int
fastcomp(fastgrep_t * fg,const char * pattern)33270c36c4eStedu fastcomp(fastgrep_t *fg, const char *pattern)
33370c36c4eStedu {
334f56cb323Stedu #ifdef SMALL
335f56cb323Stedu 	return -1;
336f56cb323Stedu #else
33770c36c4eStedu 	int i;
33870c36c4eStedu 	int bol = 0;
33970c36c4eStedu 	int eol = 0;
34070c36c4eStedu 	int shiftPatternLen;
34170c36c4eStedu 	int hasDot = 0;
34270c36c4eStedu 	int firstHalfDot = -1;
34370c36c4eStedu 	int firstLastHalfDot = -1;
34470c36c4eStedu 	int lastHalfDot = 0;
34570c36c4eStedu 
34670c36c4eStedu 	/* Initialize. */
347aeda29f3Sderaadt 	fg->patternLen = strlen(pattern);
34870c36c4eStedu 	fg->bol = 0;
34970c36c4eStedu 	fg->eol = 0;
3500c9d1b5fSmillert 	fg->wmatch = 0;
35170c36c4eStedu 	fg->reversedSearch = 0;
35270c36c4eStedu 
35370c36c4eStedu 	/* Remove end-of-line character ('$'). */
35451098fefSeric 	if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') {
35570c36c4eStedu 		eol++;
35670c36c4eStedu 		fg->eol = 1;
35770c36c4eStedu 		fg->patternLen--;
35870c36c4eStedu 	}
35970c36c4eStedu 
36070c36c4eStedu 	/* Remove beginning-of-line character ('^'). */
36170c36c4eStedu 	if (pattern[0] == '^') {
36270c36c4eStedu 		bol++;
36370c36c4eStedu 		fg->bol = 1;
36470c36c4eStedu 		fg->patternLen--;
36570c36c4eStedu 	}
36670c36c4eStedu 
3670c9d1b5fSmillert 	/* Remove enclosing [[:<:]] and [[:>:]] (word match). */
36830c8a262Sotto 	if (wflag) {
36930c8a262Sotto 		/* basic re's use \( \), extended re's ( ) */
37030c8a262Sotto 		int extra = Eflag ? 1 : 2;
37130c8a262Sotto 		fg->patternLen -= 14 + 2 * extra;
37230c8a262Sotto 		fg->wmatch = 7 + extra;
37330c8a262Sotto 	} else if (fg->patternLen >= 14 &&
3740c9d1b5fSmillert 	    strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
375394a0db6Smillert 	    strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
3760c9d1b5fSmillert 		fg->patternLen -= 14;
3770c9d1b5fSmillert 		fg->wmatch = 7;
3780c9d1b5fSmillert 	}
3790c9d1b5fSmillert 
38070c36c4eStedu 	/*
3810c9d1b5fSmillert 	 * Copy pattern minus '^' and '$' characters as well as word
3820c9d1b5fSmillert 	 * match character classes at the beginning and ending of the
3830c9d1b5fSmillert 	 * string respectively.
38470c36c4eStedu 	 */
3850c9d1b5fSmillert 	fg->pattern = grep_malloc(fg->patternLen + 1);
3860c9d1b5fSmillert 	memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
3870c9d1b5fSmillert 	fg->pattern[fg->patternLen] = '\0';
38870c36c4eStedu 
38970c36c4eStedu 	/* Look for ways to cheat...er...avoid the full regex engine. */
39070c36c4eStedu 	for (i = 0; i < fg->patternLen; i++)
39170c36c4eStedu 	{
392bcaf0f1eStedu 		switch (fg->pattern[i]) {
393bcaf0f1eStedu 		case '.':
39470c36c4eStedu 			hasDot = i;
39570c36c4eStedu 			if (i < fg->patternLen / 2) {
39603c8a690Sotto 				if (firstHalfDot < 0)
39770c36c4eStedu 					/* Closest dot to the beginning */
39870c36c4eStedu 					firstHalfDot = i;
39970c36c4eStedu 			} else {
40070c36c4eStedu 				/* Closest dot to the end of the pattern. */
40170c36c4eStedu 				lastHalfDot = i;
40270c36c4eStedu 				if (firstLastHalfDot < 0)
40370c36c4eStedu 					firstLastHalfDot = i;
40470c36c4eStedu 			}
405bcaf0f1eStedu 			break;
406bcaf0f1eStedu 		case '(': case ')':
407bcaf0f1eStedu 		case '{': case '}':
408bcaf0f1eStedu 			/* Special in BRE if preceded by '\\' */
409bcaf0f1eStedu 		case '?':
410bcaf0f1eStedu 		case '+':
411bcaf0f1eStedu 		case '|':
412bcaf0f1eStedu 			/* Not special in BRE. */
413bcaf0f1eStedu 			if (!Eflag)
414bcaf0f1eStedu 				goto nonspecial;
415bcaf0f1eStedu 		case '\\':
416bcaf0f1eStedu 		case '*':
417bcaf0f1eStedu 		case '[': case ']':
41870c36c4eStedu 			/* Free memory and let others know this is empty. */
41970c36c4eStedu 			free(fg->pattern);
42070c36c4eStedu 			fg->pattern = NULL;
42170c36c4eStedu 			return (-1);
422bcaf0f1eStedu 		default:
423bcaf0f1eStedu nonspecial:
424bcaf0f1eStedu 			if (iflag)
425bcaf0f1eStedu 				fg->pattern[i] = toupper(fg->pattern[i]);
426bcaf0f1eStedu 			break;
42770c36c4eStedu 		}
42870c36c4eStedu 	}
42970c36c4eStedu 
43070c36c4eStedu 	/*
43170c36c4eStedu 	 * Determine if a reverse search would be faster based on the placement
43270c36c4eStedu 	 * of the dots.
43370c36c4eStedu 	 */
43419f8bc08Sotto 	if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) &&
43570c36c4eStedu 	    ((lastHalfDot) && ((firstHalfDot < 0) ||
43670c36c4eStedu 	    ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
43770c36c4eStedu 		fg->reversedSearch = 1;
43870c36c4eStedu 		hasDot = fg->patternLen - (firstHalfDot < 0 ?
43970c36c4eStedu 		    firstLastHalfDot : firstHalfDot) - 1;
44070c36c4eStedu 		grep_revstr(fg->pattern, fg->patternLen);
44170c36c4eStedu 	}
44270c36c4eStedu 
44370c36c4eStedu 	/*
44470c36c4eStedu 	 * Normal Quick Search would require a shift based on the position the
44570c36c4eStedu 	 * next character after the comparison is within the pattern.  With
44670c36c4eStedu 	 * wildcards, the position of the last dot effects the maximum shift
44770c36c4eStedu 	 * distance.
44870c36c4eStedu 	 * The closer to the end the wild card is the slower the search.  A
44970c36c4eStedu 	 * reverse version of this algorithm would be useful for wildcards near
45070c36c4eStedu 	 * the end of the string.
45170c36c4eStedu 	 *
45270c36c4eStedu 	 * Examples:
45370c36c4eStedu 	 * Pattern	Max shift
45470c36c4eStedu 	 * -------	---------
45570c36c4eStedu 	 * this		5
45670c36c4eStedu 	 * .his		4
45770c36c4eStedu 	 * t.is		3
45870c36c4eStedu 	 * th.s		2
45970c36c4eStedu 	 * thi.		1
46070c36c4eStedu 	 */
46170c36c4eStedu 
46270c36c4eStedu 	/* Adjust the shift based on location of the last dot ('.'). */
46370c36c4eStedu 	shiftPatternLen = fg->patternLen - hasDot;
46470c36c4eStedu 
46570c36c4eStedu 	/* Preprocess pattern. */
46670c36c4eStedu 	for (i = 0; i <= UCHAR_MAX; i++)
46770c36c4eStedu 		fg->qsBc[i] = shiftPatternLen;
46870c36c4eStedu 	for (i = hasDot + 1; i < fg->patternLen; i++) {
46970c36c4eStedu 		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
47070c36c4eStedu 		/*
47170c36c4eStedu 		 * If case is ignored, make the jump apply to both upper and
47270c36c4eStedu 		 * lower cased characters.  As the pattern is stored in upper
47370c36c4eStedu 		 * case, apply the same to the lower case equivalents.
47470c36c4eStedu 		 */
47570c36c4eStedu 		if (iflag)
47670c36c4eStedu 			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
47770c36c4eStedu 	}
47870c36c4eStedu 
47970c36c4eStedu 	/*
48070c36c4eStedu 	 * Put pattern back to normal after pre-processing to allow for easy
48170c36c4eStedu 	 * comparisons later.
48270c36c4eStedu 	 */
48370c36c4eStedu 	if (fg->reversedSearch)
48470c36c4eStedu 		grep_revstr(fg->pattern, fg->patternLen);
48570c36c4eStedu 
48670c36c4eStedu 	return (0);
487f56cb323Stedu #endif
48870c36c4eStedu }
48970c36c4eStedu 
490809cad80Sotto /*
491809cad80Sotto  * Word boundaries using regular expressions are defined as the point
492809cad80Sotto  * of transition from a non-word char to a word char, or vice versa.
493809cad80Sotto  * This means that grep -w +a and grep -w a+ never match anything,
494809cad80Sotto  * because they lack a starting or ending transition, but grep -w a+b
495809cad80Sotto  * does match a line containing a+b.
496809cad80Sotto  */
4970c9d1b5fSmillert #define wmatch(d, l, s, e)	\
498809cad80Sotto 	((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
499809cad80Sotto 	  e > s && isword(d[s]) && isword(d[e-1]))
5000c9d1b5fSmillert 
501674ee822Smillert static int
grep_search(fastgrep_t * fg,char * data,size_t dataLen,regmatch_t * pmatch,int flags)502475af40dStedu grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch,
503475af40dStedu     int flags)
50470c36c4eStedu {
505f56cb323Stedu #ifdef SMALL
506f56cb323Stedu 	return 0;
507f56cb323Stedu #else
508f7ac433eSaschrijver 	regoff_t j;
50970c36c4eStedu 	int rtrnVal = REG_NOMATCH;
51070c36c4eStedu 
511674ee822Smillert 	pmatch->rm_so = -1;
512674ee822Smillert 	pmatch->rm_eo = -1;
513674ee822Smillert 
51470c36c4eStedu 	/* No point in going farther if we do not have enough data. */
51570c36c4eStedu 	if (dataLen < fg->patternLen)
51670c36c4eStedu 		return (rtrnVal);
51770c36c4eStedu 
51870c36c4eStedu 	/* Only try once at the beginning or ending of the line. */
51970c36c4eStedu 	if (fg->bol || fg->eol) {
520475af40dStedu 		if (fg->bol && (flags & REG_NOTBOL))
521475af40dStedu 			return 0;
52270c36c4eStedu 		/* Simple text comparison. */
52370c36c4eStedu 		/* Verify data is >= pattern length before searching on it. */
52470c36c4eStedu 		if (dataLen >= fg->patternLen) {
52570c36c4eStedu 			/* Determine where in data to start search at. */
52670c36c4eStedu 			if (fg->eol)
52770c36c4eStedu 				j = dataLen - fg->patternLen;
52870c36c4eStedu 			else
52970c36c4eStedu 				j = 0;
53070c36c4eStedu 			if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
5310c9d1b5fSmillert 				if (grep_cmp(fg->pattern, data + j,
53257c37fdaSmillert 				    fg->patternLen)) {
533674ee822Smillert 					pmatch->rm_so = j;
534674ee822Smillert 					pmatch->rm_eo = j + fg->patternLen;
5350c9d1b5fSmillert 					if (!fg->wmatch || wmatch(data, dataLen,
5360c9d1b5fSmillert 					    pmatch->rm_so, pmatch->rm_eo))
5370c9d1b5fSmillert 						rtrnVal = 0;
538674ee822Smillert 				}
53970c36c4eStedu 		}
54070c36c4eStedu 	} else if (fg->reversedSearch) {
54170c36c4eStedu 		/* Quick Search algorithm. */
54288410190Smillert 		j = dataLen;
54388410190Smillert 		do {
54470c36c4eStedu 			if (grep_cmp(fg->pattern, data + j - fg->patternLen,
54557c37fdaSmillert 			    fg->patternLen)) {
546674ee822Smillert 				pmatch->rm_so = j - fg->patternLen;
547674ee822Smillert 				pmatch->rm_eo = j;
5480c9d1b5fSmillert 				if (!fg->wmatch || wmatch(data, dataLen,
5490c9d1b5fSmillert 				    pmatch->rm_so, pmatch->rm_eo)) {
5500c9d1b5fSmillert 					rtrnVal = 0;
55170c36c4eStedu 					break;
55270c36c4eStedu 				}
5530c9d1b5fSmillert 			}
55488410190Smillert 			/* Shift if within bounds, otherwise, we are done. */
55588410190Smillert 			if (j == fg->patternLen)
55688410190Smillert 				break;
55714774268Stedu 			j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]];
55888410190Smillert 		} while (j >= fg->patternLen);
55970c36c4eStedu 	} else {
56070c36c4eStedu 		/* Quick Search algorithm. */
56170c36c4eStedu 		j = 0;
56270c36c4eStedu 		do {
56357c37fdaSmillert 			if (grep_cmp(fg->pattern, data + j, fg->patternLen)) {
564674ee822Smillert 				pmatch->rm_so = j;
565674ee822Smillert 				pmatch->rm_eo = j + fg->patternLen;
5663d12d7eeSjaredy 				if (fg->patternLen == 0 || !fg->wmatch ||
5673d12d7eeSjaredy 				    wmatch(data, dataLen, pmatch->rm_so,
5683d12d7eeSjaredy 				    pmatch->rm_eo)) {
5690c9d1b5fSmillert 					rtrnVal = 0;
57070c36c4eStedu 					break;
57170c36c4eStedu 				}
5720c9d1b5fSmillert 			}
57370c36c4eStedu 
57470c36c4eStedu 			/* Shift if within bounds, otherwise, we are done. */
57570c36c4eStedu 			if (j + fg->patternLen == dataLen)
57670c36c4eStedu 				break;
57770c36c4eStedu 			else
57814774268Stedu 				j += fg->qsBc[(unsigned char)data[j + fg->patternLen]];
57970c36c4eStedu 		} while (j <= (dataLen - fg->patternLen));
58070c36c4eStedu 	}
58170c36c4eStedu 
58270c36c4eStedu 	return (rtrnVal);
583f56cb323Stedu #endif
58470c36c4eStedu }
58570c36c4eStedu 
58670c36c4eStedu 
587fe07e37bSderaadt void *
grep_malloc(size_t size)588fe07e37bSderaadt grep_malloc(size_t size)
589fe07e37bSderaadt {
590fe07e37bSderaadt 	void	*ptr;
591fe07e37bSderaadt 
592fe07e37bSderaadt 	if ((ptr = malloc(size)) == NULL)
593f91b5226Smillert 		err(2, "malloc");
594fe07e37bSderaadt 	return ptr;
595fe07e37bSderaadt }
596fe07e37bSderaadt 
597fe07e37bSderaadt void *
grep_calloc(size_t nmemb,size_t size)5981ed98fdfSderaadt grep_calloc(size_t nmemb, size_t size)
5991ed98fdfSderaadt {
6001ed98fdfSderaadt 	void	*ptr;
6011ed98fdfSderaadt 
6021ed98fdfSderaadt 	if ((ptr = calloc(nmemb, size)) == NULL)
6031ed98fdfSderaadt 		err(2, "calloc");
6041ed98fdfSderaadt 	return ptr;
6051ed98fdfSderaadt }
6061ed98fdfSderaadt 
6071ed98fdfSderaadt void *
grep_realloc(void * ptr,size_t size)608fe07e37bSderaadt grep_realloc(void *ptr, size_t size)
609fe07e37bSderaadt {
610fe07e37bSderaadt 	if ((ptr = realloc(ptr, size)) == NULL)
611f91b5226Smillert 		err(2, "realloc");
612fe07e37bSderaadt 	return ptr;
613fe07e37bSderaadt }
614fe07e37bSderaadt 
61562ab4a5fSderaadt void *
grep_reallocarray(void * ptr,size_t nmemb,size_t size)61662ab4a5fSderaadt grep_reallocarray(void *ptr, size_t nmemb, size_t size)
61762ab4a5fSderaadt {
61862ab4a5fSderaadt 	if ((ptr = reallocarray(ptr, nmemb, size)) == NULL)
61962ab4a5fSderaadt 		err(2, "reallocarray");
62062ab4a5fSderaadt 	return ptr;
62162ab4a5fSderaadt }
62262ab4a5fSderaadt 
623f56cb323Stedu #ifndef SMALL
62470c36c4eStedu /*
62557c37fdaSmillert  * Returns:	true on success, false on failure
62670c36c4eStedu  */
62757c37fdaSmillert static bool
grep_cmp(const char * pattern,const char * data,size_t len)62814774268Stedu grep_cmp(const char *pattern, const char *data, size_t len)
62970c36c4eStedu {
63057c37fdaSmillert 	size_t i;
63170c36c4eStedu 
63270c36c4eStedu 	for (i = 0; i < len; i++) {
633acf867c4Smillert 		if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
6340340d4f1Smmcc 		    || (iflag && pattern[i] == toupper((unsigned char)data[i])))
63570c36c4eStedu 			continue;
63657c37fdaSmillert 		return false;
63770c36c4eStedu 	}
63870c36c4eStedu 
63957c37fdaSmillert 	return true;
64070c36c4eStedu }
64170c36c4eStedu 
64270c36c4eStedu static void
grep_revstr(unsigned char * str,int len)64370c36c4eStedu grep_revstr(unsigned char *str, int len)
64470c36c4eStedu {
64570c36c4eStedu 	int i;
64670c36c4eStedu 	char c;
64770c36c4eStedu 
64870c36c4eStedu 	for (i = 0; i < len / 2; i++) {
64970c36c4eStedu 		c = str[i];
65070c36c4eStedu 		str[i] = str[len - i - 1];
65170c36c4eStedu 		str[len - i - 1] = c;
65270c36c4eStedu 	}
65370c36c4eStedu }
654f56cb323Stedu #endif
65570c36c4eStedu 
656fe07e37bSderaadt void
printline(str_t * line,int sep,regmatch_t * pmatch)65714774268Stedu printline(str_t *line, int sep, regmatch_t *pmatch)
658fe07e37bSderaadt {
659f4ec5703Sop 	int n;
660fe07e37bSderaadt 
661fe07e37bSderaadt 	n = 0;
662fe07e37bSderaadt 	if (!hflag) {
663fe07e37bSderaadt 		fputs(line->file, stdout);
6641fd6e0f2Sop 		if (nullflag)
6651fd6e0f2Sop 			putchar(0);
666f4ec5703Sop 		else
667fe07e37bSderaadt 			++n;
668fe07e37bSderaadt 	}
669fe07e37bSderaadt 	if (nflag) {
670f4ec5703Sop 		if (n)
671fe07e37bSderaadt 			putchar(sep);
6722f35e644Smmcc 		printf("%lld", line->line_no);
673fe07e37bSderaadt 		++n;
674fe07e37bSderaadt 	}
675fe07e37bSderaadt 	if (bflag) {
676f4ec5703Sop 		if (n)
677fe07e37bSderaadt 			putchar(sep);
678ced39754Stedu 		printf("%lld", (long long)line->off +
679ced39754Stedu 		    (pmatch ? pmatch->rm_so : 0));
680b02c2e6bSotto 		++n;
681fe07e37bSderaadt 	}
682f4ec5703Sop 	if (n)
683fe07e37bSderaadt 		putchar(sep);
68414774268Stedu 	if (pmatch)
68514774268Stedu 		fwrite(line->dat + pmatch->rm_so,
68614774268Stedu 		    pmatch->rm_eo - pmatch->rm_so, 1, stdout);
68714774268Stedu 	else
688fe07e37bSderaadt 		fwrite(line->dat, line->len, 1, stdout);
689fe07e37bSderaadt 	putchar('\n');
690fe07e37bSderaadt }
691