1*03848a0fSmillert /* $OpenBSD: util.c,v 1.68 2023/11/15 00:50:43 millert Exp $ */
2fd6e2b5bSderaadt
3fe07e37bSderaadt /*-
4fe07e37bSderaadt * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5fe07e37bSderaadt * All rights reserved.
6fe07e37bSderaadt *
7fe07e37bSderaadt * Redistribution and use in source and binary forms, with or without
8fe07e37bSderaadt * modification, are permitted provided that the following conditions
9fe07e37bSderaadt * are met:
10fe07e37bSderaadt * 1. Redistributions of source code must retain the above copyright
11fe07e37bSderaadt * notice, this list of conditions and the following disclaimer.
12fe07e37bSderaadt * 2. Redistributions in binary form must reproduce the above copyright
13fe07e37bSderaadt * notice, this list of conditions and the following disclaimer in the
14fe07e37bSderaadt * documentation and/or other materials provided with the distribution.
15fe07e37bSderaadt *
16fe07e37bSderaadt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17fe07e37bSderaadt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18fe07e37bSderaadt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19fe07e37bSderaadt * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20fe07e37bSderaadt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21fe07e37bSderaadt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22fe07e37bSderaadt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23fe07e37bSderaadt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24fe07e37bSderaadt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25fe07e37bSderaadt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26fe07e37bSderaadt * SUCH DAMAGE.
27fe07e37bSderaadt */
28fe07e37bSderaadt
29fe07e37bSderaadt #include <sys/types.h>
30fe07e37bSderaadt #include <sys/stat.h>
31fe07e37bSderaadt
32fe07e37bSderaadt #include <ctype.h>
33fe07e37bSderaadt #include <err.h>
34fe07e37bSderaadt #include <errno.h>
35fe07e37bSderaadt #include <fts.h>
36fe07e37bSderaadt #include <regex.h>
3757c37fdaSmillert #include <stdbool.h>
38fe07e37bSderaadt #include <stdio.h>
39fe07e37bSderaadt #include <stdlib.h>
40fe07e37bSderaadt #include <string.h>
41fe07e37bSderaadt #include <unistd.h>
42fe07e37bSderaadt #include <zlib.h>
43fe07e37bSderaadt
44fe07e37bSderaadt #include "grep.h"
45fe07e37bSderaadt
46fe07e37bSderaadt /*
47fe07e37bSderaadt * Process a file line by line...
48fe07e37bSderaadt */
49fe07e37bSderaadt
50fe07e37bSderaadt static int linesqueued;
51f057df86Stedu static int procline(str_t *l, int);
52475af40dStedu static int grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch, int);
53f56cb323Stedu #ifndef SMALL
5457c37fdaSmillert static bool grep_cmp(const char *, const char *, size_t);
5570c36c4eStedu static void grep_revstr(unsigned char *, int);
56f56cb323Stedu #endif
57fe07e37bSderaadt
58fe07e37bSderaadt int
grep_tree(char ** argv)59fe07e37bSderaadt grep_tree(char **argv)
60fe07e37bSderaadt {
61fe07e37bSderaadt FTS *fts;
62fe07e37bSderaadt FTSENT *p;
63fe07e37bSderaadt int c, fts_flags;
64e8a72117Sjca char *dot_argv[] = { ".", NULL };
65e8a72117Sjca char *path;
66e8a72117Sjca
67e8a72117Sjca /* default to . if no path given */
68e8a72117Sjca if (argv[0] == NULL)
69e8a72117Sjca argv = dot_argv;
70fe07e37bSderaadt
7140155c30Stedu c = 0;
72fe07e37bSderaadt
7340155c30Stedu fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR;
74fe07e37bSderaadt
750536143aSmillert if (!(fts = fts_open(argv, fts_flags, NULL)))
76f91b5226Smillert err(2, NULL);
77fe07e37bSderaadt while ((p = fts_read(fts)) != NULL) {
78fe07e37bSderaadt switch (p->fts_info) {
79fe07e37bSderaadt case FTS_DNR:
80fe07e37bSderaadt break;
81fe07e37bSderaadt case FTS_ERR:
8220351096Smillert file_err = 1;
8320351096Smillert if(!sflag)
845ad04d35Sguenther warnc(p->fts_errno, "%s", p->fts_path);
85fe07e37bSderaadt break;
863a21e479Stedu case FTS_D:
87fe07e37bSderaadt case FTS_DP:
88fe07e37bSderaadt break;
89fe07e37bSderaadt default:
90e8a72117Sjca path = p->fts_path;
91e8a72117Sjca /* skip "./" if implied */
92e8a72117Sjca if (argv == dot_argv && p->fts_pathlen >= 2)
93e8a72117Sjca path += 2;
94483368fbSmartijn c |= procfile(path);
95fe07e37bSderaadt break;
96fe07e37bSderaadt }
97fe07e37bSderaadt }
984760cb14Sotto if (errno)
994760cb14Sotto err(2, "fts_read");
100f12e5d26Suebayasi fts_close(fts);
101fe07e37bSderaadt return c;
102fe07e37bSderaadt }
103fe07e37bSderaadt
104fe07e37bSderaadt int
procfile(char * fn)105fe07e37bSderaadt procfile(char *fn)
106fe07e37bSderaadt {
107fe07e37bSderaadt str_t ln;
108fe07e37bSderaadt file_t *f;
109483368fbSmartijn int t, z, nottext, overflow = 0;
110483368fbSmartijn unsigned long long c;
111fe07e37bSderaadt
112b5430e4dSpirofti mcount = mlimit;
113b5430e4dSpirofti
114fe07e37bSderaadt if (fn == NULL) {
115fe07e37bSderaadt fn = "(standard input)";
1163a21e479Stedu f = grep_fdopen(STDIN_FILENO);
117fe07e37bSderaadt } else {
1183a21e479Stedu f = grep_open(fn);
119fe07e37bSderaadt }
120fe07e37bSderaadt if (f == NULL) {
1213a21e479Stedu if (errno == EISDIR)
1223a21e479Stedu return 0;
12320351096Smillert file_err = 1;
124fe07e37bSderaadt if (!sflag)
125fe07e37bSderaadt warn("%s", fn);
126fe07e37bSderaadt return 0;
127fe07e37bSderaadt }
128f057df86Stedu
129f057df86Stedu nottext = grep_bin_file(f);
130f057df86Stedu if (nottext && binbehave == BIN_FILE_SKIP) {
131fe07e37bSderaadt grep_close(f);
132fe07e37bSderaadt return 0;
133fe07e37bSderaadt }
134fe07e37bSderaadt
135fe07e37bSderaadt ln.file = fn;
136e06eeb70Stedu if (labelname)
137e06eeb70Stedu ln.file = (char *)labelname;
138fe07e37bSderaadt ln.line_no = 0;
1391d5054b0Sespie ln.len = 0;
140fe07e37bSderaadt linesqueued = 0;
1412eb140a3Sjaredy tail = 0;
142fe07e37bSderaadt ln.off = -1;
143fe07e37bSderaadt
144fe07e37bSderaadt if (Bflag > 0)
145fe07e37bSderaadt initqueue();
14699210d50Sotto for (c = 0; c == 0 || !(lflag || qflag); ) {
147b5430e4dSpirofti if (mflag && mlimit == 0)
148b5430e4dSpirofti break;
149fe07e37bSderaadt ln.off += ln.len + 1;
150fe07e37bSderaadt if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
151fe07e37bSderaadt break;
152fe07e37bSderaadt if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
153fe07e37bSderaadt --ln.len;
154fe07e37bSderaadt ln.line_no++;
155fe07e37bSderaadt
156fe07e37bSderaadt z = tail;
157fe07e37bSderaadt
158f057df86Stedu if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
159fe07e37bSderaadt enqueue(&ln);
160fe07e37bSderaadt linesqueued++;
161fe07e37bSderaadt }
162483368fbSmartijn if (ULLONG_MAX - c < (unsigned long long)t)
163483368fbSmartijn overflow = 1;
164483368fbSmartijn else
165fe07e37bSderaadt c += t;
166eda4449cSdv if (mflag && mcount <= 0 && tail <= 0)
167b5430e4dSpirofti break;
168fe07e37bSderaadt }
169fe07e37bSderaadt if (Bflag > 0)
170fe07e37bSderaadt clearqueue();
171fe07e37bSderaadt grep_close(f);
172fe07e37bSderaadt
173fe07e37bSderaadt if (cflag) {
174fe07e37bSderaadt if (!hflag)
1751fd6e0f2Sop printf("%s%c", ln.file, nullflag ? '\0' : ':');
176483368fbSmartijn printf("%llu%s\n", c, overflow ? "+" : "");
177fe07e37bSderaadt }
178fe07e37bSderaadt if (lflag && c != 0)
1791fd6e0f2Sop printf("%s%c", fn, nullflag ? '\0' : '\n');
180fe07e37bSderaadt if (Lflag && c == 0)
1811fd6e0f2Sop printf("%s%c", fn, nullflag ? '\0' : '\n');
182f057df86Stedu if (c && !cflag && !lflag && !Lflag &&
1830c0c855bStedu binbehave == BIN_FILE_BIN && nottext && !qflag)
184f057df86Stedu printf("Binary file %s matches\n", fn);
185f057df86Stedu
186483368fbSmartijn return overflow || c != 0;
187fe07e37bSderaadt }
188fe07e37bSderaadt
189fe07e37bSderaadt
190fe07e37bSderaadt /*
191fe07e37bSderaadt * Process an individual line in a file. Return non-zero if it matches.
192fe07e37bSderaadt */
193fe07e37bSderaadt
1946cd4fad2Sderaadt #define isword(x) (isalnum((unsigned char)x) || (x) == '_')
195fe07e37bSderaadt
196fe07e37bSderaadt static int
procline(str_t * l,int nottext)197f057df86Stedu procline(str_t *l, int nottext)
198fe07e37bSderaadt {
19925f2b9c3Stedu regmatch_t pmatch = { 0 };
200*03848a0fSmillert int c, i, r, counted;
201f7ac433eSaschrijver regoff_t offset;
202f7ac433eSaschrijver
203f7ac433eSaschrijver /* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
204f7ac433eSaschrijver * into regoff_t */
205f7ac433eSaschrijver if (l->len > SSIZE_MAX) {
206f7ac433eSaschrijver errx(2, "Line is too big to process");
207f7ac433eSaschrijver }
208fe07e37bSderaadt
20914774268Stedu c = 0;
21014774268Stedu i = 0;
211*03848a0fSmillert counted = 0;
212fe07e37bSderaadt if (matchall) {
2131b60208bStedu c = 1;
214fe07e37bSderaadt goto print;
215fe07e37bSderaadt }
216eda4449cSdv if (mflag && mcount <= 0)
217eda4449cSdv goto print;
218fe07e37bSderaadt
21914774268Stedu for (i = 0; i < patterns; i++) {
22014774268Stedu offset = 0;
22114774268Stedu redo:
2220c9d1b5fSmillert if (fg_pattern[i].pattern) {
223475af40dStedu int flags = 0;
224475af40dStedu if (offset)
225475af40dStedu flags |= REG_NOTBOL;
22614774268Stedu r = grep_search(&fg_pattern[i], l->dat + offset,
227475af40dStedu l->len - offset, &pmatch, flags);
22814774268Stedu pmatch.rm_so += offset;
22914774268Stedu pmatch.rm_eo += offset;
2300c9d1b5fSmillert } else {
231475af40dStedu int flags = eflags;
232475af40dStedu if (offset)
233475af40dStedu flags |= REG_NOTBOL;
23414774268Stedu pmatch.rm_so = offset;
235cbb0f937Sotto pmatch.rm_eo = l->len;
236475af40dStedu r = regexec(&r_pattern[i], l->dat, 1, &pmatch, flags);
237fe07e37bSderaadt }
2380c9d1b5fSmillert if (r == 0 && xflag) {
239fe07e37bSderaadt if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
240fe07e37bSderaadt r = REG_NOMATCH;
241fe07e37bSderaadt }
242c85fc9dcSdhartmei if (r == 0) {
24314774268Stedu c = 1;
24431a151d0Smillert if (oflag && pmatch.rm_so != pmatch.rm_eo)
24514774268Stedu goto print;
246fe07e37bSderaadt break;
247fe07e37bSderaadt }
248fe07e37bSderaadt }
24914774268Stedu if (oflag)
25014774268Stedu return c;
25114774268Stedu print:
252c85fc9dcSdhartmei if (vflag)
253c85fc9dcSdhartmei c = !c;
254fe07e37bSderaadt
255*03848a0fSmillert /* Count the matches if there is a match limit (but only once). */
256*03848a0fSmillert if (mflag && !counted) {
257b5430e4dSpirofti mcount -= c;
258*03848a0fSmillert counted = 1;
259*03848a0fSmillert }
260b5430e4dSpirofti
261f057df86Stedu if (c && binbehave == BIN_FILE_BIN && nottext)
262f057df86Stedu return c; /* Binary file */
263f057df86Stedu
264fe07e37bSderaadt if ((tail > 0 || c) && !cflag && !qflag) {
265fe07e37bSderaadt if (c) {
2662774c493Sotto if (first > 0 && tail == 0 && (Aflag || (Bflag &&
2672774c493Sotto Bflag < linesqueued)))
268fe07e37bSderaadt printf("--\n");
269fe07e37bSderaadt first = 1;
270fe07e37bSderaadt tail = Aflag;
271fe07e37bSderaadt if (Bflag > 0)
272fe07e37bSderaadt printqueue();
273fe07e37bSderaadt linesqueued = 0;
27414774268Stedu printline(l, ':', oflag ? &pmatch : NULL);
275fe07e37bSderaadt } else {
27614774268Stedu printline(l, '-', oflag ? &pmatch : NULL);
277fe07e37bSderaadt tail--;
278fe07e37bSderaadt }
279fe07e37bSderaadt }
28014774268Stedu if (oflag && !matchall) {
28114774268Stedu offset = pmatch.rm_eo;
28214774268Stedu goto redo;
28314774268Stedu }
284fe07e37bSderaadt return c;
285fe07e37bSderaadt }
286fe07e37bSderaadt
287f56cb323Stedu #ifndef SMALL
2885c151985Sotto void
fgrepcomp(fastgrep_t * fg,const unsigned char * pattern)2896cd4fad2Sderaadt fgrepcomp(fastgrep_t *fg, const unsigned char *pattern)
290acf867c4Smillert {
291acf867c4Smillert int i;
292acf867c4Smillert
293acf867c4Smillert /* Initialize. */
294acf867c4Smillert fg->patternLen = strlen(pattern);
295acf867c4Smillert fg->bol = 0;
296acf867c4Smillert fg->eol = 0;
297acf867c4Smillert fg->wmatch = wflag;
298acf867c4Smillert fg->reversedSearch = 0;
299acf867c4Smillert
300acf867c4Smillert /*
301acf867c4Smillert * Make a copy and upper case it for later if in -i mode,
302acf867c4Smillert * else just copy the pointer.
303acf867c4Smillert */
304acf867c4Smillert if (iflag) {
305acf867c4Smillert fg->pattern = grep_malloc(fg->patternLen + 1);
306acf867c4Smillert for (i = 0; i < fg->patternLen; i++)
307acf867c4Smillert fg->pattern[i] = toupper(pattern[i]);
308acf867c4Smillert fg->pattern[fg->patternLen] = '\0';
309acf867c4Smillert } else
310aeda29f3Sderaadt fg->pattern = (unsigned char *)pattern; /* really const */
311acf867c4Smillert
312acf867c4Smillert /* Preprocess pattern. */
313acf867c4Smillert for (i = 0; i <= UCHAR_MAX; i++)
314acf867c4Smillert fg->qsBc[i] = fg->patternLen;
315acf867c4Smillert for (i = 1; i < fg->patternLen; i++) {
316acf867c4Smillert fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
317acf867c4Smillert /*
318acf867c4Smillert * If case is ignored, make the jump apply to both upper and
319acf867c4Smillert * lower cased characters. As the pattern is stored in upper
320acf867c4Smillert * case, apply the same to the lower case equivalents.
321acf867c4Smillert */
322acf867c4Smillert if (iflag)
323acf867c4Smillert fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
324acf867c4Smillert }
325acf867c4Smillert }
326f56cb323Stedu #endif
327acf867c4Smillert
328acf867c4Smillert /*
329acf867c4Smillert * Returns: -1 on failure, 0 on success
330acf867c4Smillert */
331acf867c4Smillert int
fastcomp(fastgrep_t * fg,const char * pattern)33270c36c4eStedu fastcomp(fastgrep_t *fg, const char *pattern)
33370c36c4eStedu {
334f56cb323Stedu #ifdef SMALL
335f56cb323Stedu return -1;
336f56cb323Stedu #else
33770c36c4eStedu int i;
33870c36c4eStedu int bol = 0;
33970c36c4eStedu int eol = 0;
34070c36c4eStedu int shiftPatternLen;
34170c36c4eStedu int hasDot = 0;
34270c36c4eStedu int firstHalfDot = -1;
34370c36c4eStedu int firstLastHalfDot = -1;
34470c36c4eStedu int lastHalfDot = 0;
34570c36c4eStedu
34670c36c4eStedu /* Initialize. */
347aeda29f3Sderaadt fg->patternLen = strlen(pattern);
34870c36c4eStedu fg->bol = 0;
34970c36c4eStedu fg->eol = 0;
3500c9d1b5fSmillert fg->wmatch = 0;
35170c36c4eStedu fg->reversedSearch = 0;
35270c36c4eStedu
35370c36c4eStedu /* Remove end-of-line character ('$'). */
35451098fefSeric if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') {
35570c36c4eStedu eol++;
35670c36c4eStedu fg->eol = 1;
35770c36c4eStedu fg->patternLen--;
35870c36c4eStedu }
35970c36c4eStedu
36070c36c4eStedu /* Remove beginning-of-line character ('^'). */
36170c36c4eStedu if (pattern[0] == '^') {
36270c36c4eStedu bol++;
36370c36c4eStedu fg->bol = 1;
36470c36c4eStedu fg->patternLen--;
36570c36c4eStedu }
36670c36c4eStedu
3670c9d1b5fSmillert /* Remove enclosing [[:<:]] and [[:>:]] (word match). */
36830c8a262Sotto if (wflag) {
36930c8a262Sotto /* basic re's use \( \), extended re's ( ) */
37030c8a262Sotto int extra = Eflag ? 1 : 2;
37130c8a262Sotto fg->patternLen -= 14 + 2 * extra;
37230c8a262Sotto fg->wmatch = 7 + extra;
37330c8a262Sotto } else if (fg->patternLen >= 14 &&
3740c9d1b5fSmillert strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
375394a0db6Smillert strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
3760c9d1b5fSmillert fg->patternLen -= 14;
3770c9d1b5fSmillert fg->wmatch = 7;
3780c9d1b5fSmillert }
3790c9d1b5fSmillert
38070c36c4eStedu /*
3810c9d1b5fSmillert * Copy pattern minus '^' and '$' characters as well as word
3820c9d1b5fSmillert * match character classes at the beginning and ending of the
3830c9d1b5fSmillert * string respectively.
38470c36c4eStedu */
3850c9d1b5fSmillert fg->pattern = grep_malloc(fg->patternLen + 1);
3860c9d1b5fSmillert memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
3870c9d1b5fSmillert fg->pattern[fg->patternLen] = '\0';
38870c36c4eStedu
38970c36c4eStedu /* Look for ways to cheat...er...avoid the full regex engine. */
39070c36c4eStedu for (i = 0; i < fg->patternLen; i++)
39170c36c4eStedu {
392bcaf0f1eStedu switch (fg->pattern[i]) {
393bcaf0f1eStedu case '.':
39470c36c4eStedu hasDot = i;
39570c36c4eStedu if (i < fg->patternLen / 2) {
39603c8a690Sotto if (firstHalfDot < 0)
39770c36c4eStedu /* Closest dot to the beginning */
39870c36c4eStedu firstHalfDot = i;
39970c36c4eStedu } else {
40070c36c4eStedu /* Closest dot to the end of the pattern. */
40170c36c4eStedu lastHalfDot = i;
40270c36c4eStedu if (firstLastHalfDot < 0)
40370c36c4eStedu firstLastHalfDot = i;
40470c36c4eStedu }
405bcaf0f1eStedu break;
406bcaf0f1eStedu case '(': case ')':
407bcaf0f1eStedu case '{': case '}':
408bcaf0f1eStedu /* Special in BRE if preceded by '\\' */
409bcaf0f1eStedu case '?':
410bcaf0f1eStedu case '+':
411bcaf0f1eStedu case '|':
412bcaf0f1eStedu /* Not special in BRE. */
413bcaf0f1eStedu if (!Eflag)
414bcaf0f1eStedu goto nonspecial;
415bcaf0f1eStedu case '\\':
416bcaf0f1eStedu case '*':
417bcaf0f1eStedu case '[': case ']':
41870c36c4eStedu /* Free memory and let others know this is empty. */
41970c36c4eStedu free(fg->pattern);
42070c36c4eStedu fg->pattern = NULL;
42170c36c4eStedu return (-1);
422bcaf0f1eStedu default:
423bcaf0f1eStedu nonspecial:
424bcaf0f1eStedu if (iflag)
425bcaf0f1eStedu fg->pattern[i] = toupper(fg->pattern[i]);
426bcaf0f1eStedu break;
42770c36c4eStedu }
42870c36c4eStedu }
42970c36c4eStedu
43070c36c4eStedu /*
43170c36c4eStedu * Determine if a reverse search would be faster based on the placement
43270c36c4eStedu * of the dots.
43370c36c4eStedu */
43419f8bc08Sotto if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) &&
43570c36c4eStedu ((lastHalfDot) && ((firstHalfDot < 0) ||
43670c36c4eStedu ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
43770c36c4eStedu fg->reversedSearch = 1;
43870c36c4eStedu hasDot = fg->patternLen - (firstHalfDot < 0 ?
43970c36c4eStedu firstLastHalfDot : firstHalfDot) - 1;
44070c36c4eStedu grep_revstr(fg->pattern, fg->patternLen);
44170c36c4eStedu }
44270c36c4eStedu
44370c36c4eStedu /*
44470c36c4eStedu * Normal Quick Search would require a shift based on the position the
44570c36c4eStedu * next character after the comparison is within the pattern. With
44670c36c4eStedu * wildcards, the position of the last dot effects the maximum shift
44770c36c4eStedu * distance.
44870c36c4eStedu * The closer to the end the wild card is the slower the search. A
44970c36c4eStedu * reverse version of this algorithm would be useful for wildcards near
45070c36c4eStedu * the end of the string.
45170c36c4eStedu *
45270c36c4eStedu * Examples:
45370c36c4eStedu * Pattern Max shift
45470c36c4eStedu * ------- ---------
45570c36c4eStedu * this 5
45670c36c4eStedu * .his 4
45770c36c4eStedu * t.is 3
45870c36c4eStedu * th.s 2
45970c36c4eStedu * thi. 1
46070c36c4eStedu */
46170c36c4eStedu
46270c36c4eStedu /* Adjust the shift based on location of the last dot ('.'). */
46370c36c4eStedu shiftPatternLen = fg->patternLen - hasDot;
46470c36c4eStedu
46570c36c4eStedu /* Preprocess pattern. */
46670c36c4eStedu for (i = 0; i <= UCHAR_MAX; i++)
46770c36c4eStedu fg->qsBc[i] = shiftPatternLen;
46870c36c4eStedu for (i = hasDot + 1; i < fg->patternLen; i++) {
46970c36c4eStedu fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
47070c36c4eStedu /*
47170c36c4eStedu * If case is ignored, make the jump apply to both upper and
47270c36c4eStedu * lower cased characters. As the pattern is stored in upper
47370c36c4eStedu * case, apply the same to the lower case equivalents.
47470c36c4eStedu */
47570c36c4eStedu if (iflag)
47670c36c4eStedu fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
47770c36c4eStedu }
47870c36c4eStedu
47970c36c4eStedu /*
48070c36c4eStedu * Put pattern back to normal after pre-processing to allow for easy
48170c36c4eStedu * comparisons later.
48270c36c4eStedu */
48370c36c4eStedu if (fg->reversedSearch)
48470c36c4eStedu grep_revstr(fg->pattern, fg->patternLen);
48570c36c4eStedu
48670c36c4eStedu return (0);
487f56cb323Stedu #endif
48870c36c4eStedu }
48970c36c4eStedu
490809cad80Sotto /*
491809cad80Sotto * Word boundaries using regular expressions are defined as the point
492809cad80Sotto * of transition from a non-word char to a word char, or vice versa.
493809cad80Sotto * This means that grep -w +a and grep -w a+ never match anything,
494809cad80Sotto * because they lack a starting or ending transition, but grep -w a+b
495809cad80Sotto * does match a line containing a+b.
496809cad80Sotto */
4970c9d1b5fSmillert #define wmatch(d, l, s, e) \
498809cad80Sotto ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
499809cad80Sotto e > s && isword(d[s]) && isword(d[e-1]))
5000c9d1b5fSmillert
501674ee822Smillert static int
grep_search(fastgrep_t * fg,char * data,size_t dataLen,regmatch_t * pmatch,int flags)502475af40dStedu grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch,
503475af40dStedu int flags)
50470c36c4eStedu {
505f56cb323Stedu #ifdef SMALL
506f56cb323Stedu return 0;
507f56cb323Stedu #else
508f7ac433eSaschrijver regoff_t j;
50970c36c4eStedu int rtrnVal = REG_NOMATCH;
51070c36c4eStedu
511674ee822Smillert pmatch->rm_so = -1;
512674ee822Smillert pmatch->rm_eo = -1;
513674ee822Smillert
51470c36c4eStedu /* No point in going farther if we do not have enough data. */
51570c36c4eStedu if (dataLen < fg->patternLen)
51670c36c4eStedu return (rtrnVal);
51770c36c4eStedu
51870c36c4eStedu /* Only try once at the beginning or ending of the line. */
51970c36c4eStedu if (fg->bol || fg->eol) {
520475af40dStedu if (fg->bol && (flags & REG_NOTBOL))
521475af40dStedu return 0;
52270c36c4eStedu /* Simple text comparison. */
52370c36c4eStedu /* Verify data is >= pattern length before searching on it. */
52470c36c4eStedu if (dataLen >= fg->patternLen) {
52570c36c4eStedu /* Determine where in data to start search at. */
52670c36c4eStedu if (fg->eol)
52770c36c4eStedu j = dataLen - fg->patternLen;
52870c36c4eStedu else
52970c36c4eStedu j = 0;
53070c36c4eStedu if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
5310c9d1b5fSmillert if (grep_cmp(fg->pattern, data + j,
53257c37fdaSmillert fg->patternLen)) {
533674ee822Smillert pmatch->rm_so = j;
534674ee822Smillert pmatch->rm_eo = j + fg->patternLen;
5350c9d1b5fSmillert if (!fg->wmatch || wmatch(data, dataLen,
5360c9d1b5fSmillert pmatch->rm_so, pmatch->rm_eo))
5370c9d1b5fSmillert rtrnVal = 0;
538674ee822Smillert }
53970c36c4eStedu }
54070c36c4eStedu } else if (fg->reversedSearch) {
54170c36c4eStedu /* Quick Search algorithm. */
54288410190Smillert j = dataLen;
54388410190Smillert do {
54470c36c4eStedu if (grep_cmp(fg->pattern, data + j - fg->patternLen,
54557c37fdaSmillert fg->patternLen)) {
546674ee822Smillert pmatch->rm_so = j - fg->patternLen;
547674ee822Smillert pmatch->rm_eo = j;
5480c9d1b5fSmillert if (!fg->wmatch || wmatch(data, dataLen,
5490c9d1b5fSmillert pmatch->rm_so, pmatch->rm_eo)) {
5500c9d1b5fSmillert rtrnVal = 0;
55170c36c4eStedu break;
55270c36c4eStedu }
5530c9d1b5fSmillert }
55488410190Smillert /* Shift if within bounds, otherwise, we are done. */
55588410190Smillert if (j == fg->patternLen)
55688410190Smillert break;
55714774268Stedu j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]];
55888410190Smillert } while (j >= fg->patternLen);
55970c36c4eStedu } else {
56070c36c4eStedu /* Quick Search algorithm. */
56170c36c4eStedu j = 0;
56270c36c4eStedu do {
56357c37fdaSmillert if (grep_cmp(fg->pattern, data + j, fg->patternLen)) {
564674ee822Smillert pmatch->rm_so = j;
565674ee822Smillert pmatch->rm_eo = j + fg->patternLen;
5663d12d7eeSjaredy if (fg->patternLen == 0 || !fg->wmatch ||
5673d12d7eeSjaredy wmatch(data, dataLen, pmatch->rm_so,
5683d12d7eeSjaredy pmatch->rm_eo)) {
5690c9d1b5fSmillert rtrnVal = 0;
57070c36c4eStedu break;
57170c36c4eStedu }
5720c9d1b5fSmillert }
57370c36c4eStedu
57470c36c4eStedu /* Shift if within bounds, otherwise, we are done. */
57570c36c4eStedu if (j + fg->patternLen == dataLen)
57670c36c4eStedu break;
57770c36c4eStedu else
57814774268Stedu j += fg->qsBc[(unsigned char)data[j + fg->patternLen]];
57970c36c4eStedu } while (j <= (dataLen - fg->patternLen));
58070c36c4eStedu }
58170c36c4eStedu
58270c36c4eStedu return (rtrnVal);
583f56cb323Stedu #endif
58470c36c4eStedu }
58570c36c4eStedu
58670c36c4eStedu
587fe07e37bSderaadt void *
grep_malloc(size_t size)588fe07e37bSderaadt grep_malloc(size_t size)
589fe07e37bSderaadt {
590fe07e37bSderaadt void *ptr;
591fe07e37bSderaadt
592fe07e37bSderaadt if ((ptr = malloc(size)) == NULL)
593f91b5226Smillert err(2, "malloc");
594fe07e37bSderaadt return ptr;
595fe07e37bSderaadt }
596fe07e37bSderaadt
597fe07e37bSderaadt void *
grep_calloc(size_t nmemb,size_t size)5981ed98fdfSderaadt grep_calloc(size_t nmemb, size_t size)
5991ed98fdfSderaadt {
6001ed98fdfSderaadt void *ptr;
6011ed98fdfSderaadt
6021ed98fdfSderaadt if ((ptr = calloc(nmemb, size)) == NULL)
6031ed98fdfSderaadt err(2, "calloc");
6041ed98fdfSderaadt return ptr;
6051ed98fdfSderaadt }
6061ed98fdfSderaadt
6071ed98fdfSderaadt void *
grep_realloc(void * ptr,size_t size)608fe07e37bSderaadt grep_realloc(void *ptr, size_t size)
609fe07e37bSderaadt {
610fe07e37bSderaadt if ((ptr = realloc(ptr, size)) == NULL)
611f91b5226Smillert err(2, "realloc");
612fe07e37bSderaadt return ptr;
613fe07e37bSderaadt }
614fe07e37bSderaadt
61562ab4a5fSderaadt void *
grep_reallocarray(void * ptr,size_t nmemb,size_t size)61662ab4a5fSderaadt grep_reallocarray(void *ptr, size_t nmemb, size_t size)
61762ab4a5fSderaadt {
61862ab4a5fSderaadt if ((ptr = reallocarray(ptr, nmemb, size)) == NULL)
61962ab4a5fSderaadt err(2, "reallocarray");
62062ab4a5fSderaadt return ptr;
62162ab4a5fSderaadt }
62262ab4a5fSderaadt
623f56cb323Stedu #ifndef SMALL
62470c36c4eStedu /*
62557c37fdaSmillert * Returns: true on success, false on failure
62670c36c4eStedu */
62757c37fdaSmillert static bool
grep_cmp(const char * pattern,const char * data,size_t len)62814774268Stedu grep_cmp(const char *pattern, const char *data, size_t len)
62970c36c4eStedu {
63057c37fdaSmillert size_t i;
63170c36c4eStedu
63270c36c4eStedu for (i = 0; i < len; i++) {
633acf867c4Smillert if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
6340340d4f1Smmcc || (iflag && pattern[i] == toupper((unsigned char)data[i])))
63570c36c4eStedu continue;
63657c37fdaSmillert return false;
63770c36c4eStedu }
63870c36c4eStedu
63957c37fdaSmillert return true;
64070c36c4eStedu }
64170c36c4eStedu
64270c36c4eStedu static void
grep_revstr(unsigned char * str,int len)64370c36c4eStedu grep_revstr(unsigned char *str, int len)
64470c36c4eStedu {
64570c36c4eStedu int i;
64670c36c4eStedu char c;
64770c36c4eStedu
64870c36c4eStedu for (i = 0; i < len / 2; i++) {
64970c36c4eStedu c = str[i];
65070c36c4eStedu str[i] = str[len - i - 1];
65170c36c4eStedu str[len - i - 1] = c;
65270c36c4eStedu }
65370c36c4eStedu }
654f56cb323Stedu #endif
65570c36c4eStedu
656fe07e37bSderaadt void
printline(str_t * line,int sep,regmatch_t * pmatch)65714774268Stedu printline(str_t *line, int sep, regmatch_t *pmatch)
658fe07e37bSderaadt {
659f4ec5703Sop int n;
660fe07e37bSderaadt
661fe07e37bSderaadt n = 0;
662fe07e37bSderaadt if (!hflag) {
663fe07e37bSderaadt fputs(line->file, stdout);
6641fd6e0f2Sop if (nullflag)
6651fd6e0f2Sop putchar(0);
666f4ec5703Sop else
667fe07e37bSderaadt ++n;
668fe07e37bSderaadt }
669fe07e37bSderaadt if (nflag) {
670f4ec5703Sop if (n)
671fe07e37bSderaadt putchar(sep);
6722f35e644Smmcc printf("%lld", line->line_no);
673fe07e37bSderaadt ++n;
674fe07e37bSderaadt }
675fe07e37bSderaadt if (bflag) {
676f4ec5703Sop if (n)
677fe07e37bSderaadt putchar(sep);
678ced39754Stedu printf("%lld", (long long)line->off +
679ced39754Stedu (pmatch ? pmatch->rm_so : 0));
680b02c2e6bSotto ++n;
681fe07e37bSderaadt }
682f4ec5703Sop if (n)
683fe07e37bSderaadt putchar(sep);
68414774268Stedu if (pmatch)
68514774268Stedu fwrite(line->dat + pmatch->rm_so,
68614774268Stedu pmatch->rm_eo - pmatch->rm_so, 1, stdout);
68714774268Stedu else
688fe07e37bSderaadt fwrite(line->dat, line->len, 1, stdout);
689fe07e37bSderaadt putchar('\n');
690fe07e37bSderaadt }
691