1*d794ecc9SDavid van Moolenbroek /* $OpenBSD: util.c,v 1.48 2014/05/20 01:25:23 guenther Exp $ */
2*d794ecc9SDavid van Moolenbroek
3*d794ecc9SDavid van Moolenbroek /*-
4*d794ecc9SDavid van Moolenbroek * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5*d794ecc9SDavid van Moolenbroek * All rights reserved.
6*d794ecc9SDavid van Moolenbroek *
7*d794ecc9SDavid van Moolenbroek * Redistribution and use in source and binary forms, with or without
8*d794ecc9SDavid van Moolenbroek * modification, are permitted provided that the following conditions
9*d794ecc9SDavid van Moolenbroek * are met:
10*d794ecc9SDavid van Moolenbroek * 1. Redistributions of source code must retain the above copyright
11*d794ecc9SDavid van Moolenbroek * notice, this list of conditions and the following disclaimer.
12*d794ecc9SDavid van Moolenbroek * 2. Redistributions in binary form must reproduce the above copyright
13*d794ecc9SDavid van Moolenbroek * notice, this list of conditions and the following disclaimer in the
14*d794ecc9SDavid van Moolenbroek * documentation and/or other materials provided with the distribution.
15*d794ecc9SDavid van Moolenbroek *
16*d794ecc9SDavid van Moolenbroek * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17*d794ecc9SDavid van Moolenbroek * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*d794ecc9SDavid van Moolenbroek * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*d794ecc9SDavid van Moolenbroek * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*d794ecc9SDavid van Moolenbroek * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*d794ecc9SDavid van Moolenbroek * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*d794ecc9SDavid van Moolenbroek * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*d794ecc9SDavid van Moolenbroek * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*d794ecc9SDavid van Moolenbroek * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*d794ecc9SDavid van Moolenbroek * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*d794ecc9SDavid van Moolenbroek * SUCH DAMAGE.
27*d794ecc9SDavid van Moolenbroek */
28*d794ecc9SDavid van Moolenbroek
29*d794ecc9SDavid van Moolenbroek #include <sys/types.h>
30*d794ecc9SDavid van Moolenbroek #include <sys/stat.h>
31*d794ecc9SDavid van Moolenbroek
32*d794ecc9SDavid van Moolenbroek #include <ctype.h>
33*d794ecc9SDavid van Moolenbroek #include <err.h>
34*d794ecc9SDavid van Moolenbroek #include <errno.h>
35*d794ecc9SDavid van Moolenbroek #include <fts.h>
36*d794ecc9SDavid van Moolenbroek #include <regex.h>
37*d794ecc9SDavid van Moolenbroek #include <stdio.h>
38*d794ecc9SDavid van Moolenbroek #include <stdlib.h>
39*d794ecc9SDavid van Moolenbroek #include <string.h>
40*d794ecc9SDavid van Moolenbroek #include <unistd.h>
41*d794ecc9SDavid van Moolenbroek #include <zlib.h>
42*d794ecc9SDavid van Moolenbroek
43*d794ecc9SDavid van Moolenbroek #include "grep.h"
44*d794ecc9SDavid van Moolenbroek
45*d794ecc9SDavid van Moolenbroek /*
46*d794ecc9SDavid van Moolenbroek * Process a file line by line...
47*d794ecc9SDavid van Moolenbroek */
48*d794ecc9SDavid van Moolenbroek
49*d794ecc9SDavid van Moolenbroek static int linesqueued;
50*d794ecc9SDavid van Moolenbroek static int procline(str_t *l, int);
51*d794ecc9SDavid van Moolenbroek static int grep_search(fastgrep_t *, unsigned char *, size_t,
52*d794ecc9SDavid van Moolenbroek regmatch_t *pmatch);
53*d794ecc9SDavid van Moolenbroek #ifndef SMALL
54*d794ecc9SDavid van Moolenbroek static int grep_cmp(const unsigned char *, const unsigned char *, size_t);
55*d794ecc9SDavid van Moolenbroek static void grep_revstr(unsigned char *, int);
56*d794ecc9SDavid van Moolenbroek #endif
57*d794ecc9SDavid van Moolenbroek
58*d794ecc9SDavid van Moolenbroek int
grep_tree(char ** argv)59*d794ecc9SDavid van Moolenbroek grep_tree(char **argv)
60*d794ecc9SDavid van Moolenbroek {
61*d794ecc9SDavid van Moolenbroek FTS *fts;
62*d794ecc9SDavid van Moolenbroek FTSENT *p;
63*d794ecc9SDavid van Moolenbroek int c, fts_flags;
64*d794ecc9SDavid van Moolenbroek
65*d794ecc9SDavid van Moolenbroek c = 0;
66*d794ecc9SDavid van Moolenbroek
67*d794ecc9SDavid van Moolenbroek fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR;
68*d794ecc9SDavid van Moolenbroek
69*d794ecc9SDavid van Moolenbroek if (!(fts = fts_open(argv, fts_flags, NULL)))
70*d794ecc9SDavid van Moolenbroek err(2, NULL);
71*d794ecc9SDavid van Moolenbroek while ((p = fts_read(fts)) != NULL) {
72*d794ecc9SDavid van Moolenbroek switch (p->fts_info) {
73*d794ecc9SDavid van Moolenbroek case FTS_DNR:
74*d794ecc9SDavid van Moolenbroek break;
75*d794ecc9SDavid van Moolenbroek case FTS_ERR:
76*d794ecc9SDavid van Moolenbroek file_err = 1;
77*d794ecc9SDavid van Moolenbroek if(!sflag) {
78*d794ecc9SDavid van Moolenbroek errno = p->fts_errno;
79*d794ecc9SDavid van Moolenbroek warn("%s", p->fts_path);
80*d794ecc9SDavid van Moolenbroek }
81*d794ecc9SDavid van Moolenbroek break;
82*d794ecc9SDavid van Moolenbroek case FTS_DP:
83*d794ecc9SDavid van Moolenbroek break;
84*d794ecc9SDavid van Moolenbroek default:
85*d794ecc9SDavid van Moolenbroek c += procfile(p->fts_path);
86*d794ecc9SDavid van Moolenbroek break;
87*d794ecc9SDavid van Moolenbroek }
88*d794ecc9SDavid van Moolenbroek }
89*d794ecc9SDavid van Moolenbroek if (errno)
90*d794ecc9SDavid van Moolenbroek err(2, "fts_read");
91*d794ecc9SDavid van Moolenbroek
92*d794ecc9SDavid van Moolenbroek return c;
93*d794ecc9SDavid van Moolenbroek }
94*d794ecc9SDavid van Moolenbroek
95*d794ecc9SDavid van Moolenbroek int
procfile(const char * fn)96*d794ecc9SDavid van Moolenbroek procfile(const char *fn)
97*d794ecc9SDavid van Moolenbroek {
98*d794ecc9SDavid van Moolenbroek str_t ln;
99*d794ecc9SDavid van Moolenbroek file_t *f;
100*d794ecc9SDavid van Moolenbroek int c, t, z, nottext;
101*d794ecc9SDavid van Moolenbroek
102*d794ecc9SDavid van Moolenbroek if (fn == NULL) {
103*d794ecc9SDavid van Moolenbroek fn = "(standard input)";
104*d794ecc9SDavid van Moolenbroek f = grep_fdopen(STDIN_FILENO, "r");
105*d794ecc9SDavid van Moolenbroek } else {
106*d794ecc9SDavid van Moolenbroek f = grep_open(fn, "r");
107*d794ecc9SDavid van Moolenbroek }
108*d794ecc9SDavid van Moolenbroek if (f == NULL) {
109*d794ecc9SDavid van Moolenbroek file_err = 1;
110*d794ecc9SDavid van Moolenbroek if (!sflag)
111*d794ecc9SDavid van Moolenbroek warn("%s", fn);
112*d794ecc9SDavid van Moolenbroek return 0;
113*d794ecc9SDavid van Moolenbroek }
114*d794ecc9SDavid van Moolenbroek
115*d794ecc9SDavid van Moolenbroek nottext = grep_bin_file(f);
116*d794ecc9SDavid van Moolenbroek if (nottext && binbehave == BIN_FILE_SKIP) {
117*d794ecc9SDavid van Moolenbroek grep_close(f);
118*d794ecc9SDavid van Moolenbroek return 0;
119*d794ecc9SDavid van Moolenbroek }
120*d794ecc9SDavid van Moolenbroek
121*d794ecc9SDavid van Moolenbroek ln.file = fn;
122*d794ecc9SDavid van Moolenbroek ln.line_no = 0;
123*d794ecc9SDavid van Moolenbroek ln.len = 0;
124*d794ecc9SDavid van Moolenbroek linesqueued = 0;
125*d794ecc9SDavid van Moolenbroek tail = 0;
126*d794ecc9SDavid van Moolenbroek ln.off = -1;
127*d794ecc9SDavid van Moolenbroek
128*d794ecc9SDavid van Moolenbroek if (Bflag > 0)
129*d794ecc9SDavid van Moolenbroek initqueue();
130*d794ecc9SDavid van Moolenbroek for (c = 0; c == 0 || !(lflag || qflag); ) {
131*d794ecc9SDavid van Moolenbroek ln.off += ln.len + 1;
132*d794ecc9SDavid van Moolenbroek if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
133*d794ecc9SDavid van Moolenbroek break;
134*d794ecc9SDavid van Moolenbroek if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
135*d794ecc9SDavid van Moolenbroek --ln.len;
136*d794ecc9SDavid van Moolenbroek ln.line_no++;
137*d794ecc9SDavid van Moolenbroek
138*d794ecc9SDavid van Moolenbroek z = tail;
139*d794ecc9SDavid van Moolenbroek
140*d794ecc9SDavid van Moolenbroek if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
141*d794ecc9SDavid van Moolenbroek enqueue(&ln);
142*d794ecc9SDavid van Moolenbroek linesqueued++;
143*d794ecc9SDavid van Moolenbroek }
144*d794ecc9SDavid van Moolenbroek c += t;
145*d794ecc9SDavid van Moolenbroek }
146*d794ecc9SDavid van Moolenbroek if (Bflag > 0)
147*d794ecc9SDavid van Moolenbroek clearqueue();
148*d794ecc9SDavid van Moolenbroek grep_close(f);
149*d794ecc9SDavid van Moolenbroek
150*d794ecc9SDavid van Moolenbroek if (cflag) {
151*d794ecc9SDavid van Moolenbroek if (!hflag)
152*d794ecc9SDavid van Moolenbroek printf("%s:", ln.file);
153*d794ecc9SDavid van Moolenbroek printf("%u\n", c);
154*d794ecc9SDavid van Moolenbroek }
155*d794ecc9SDavid van Moolenbroek if (lflag && c != 0)
156*d794ecc9SDavid van Moolenbroek printf("%s\n", fn);
157*d794ecc9SDavid van Moolenbroek if (Lflag && c == 0)
158*d794ecc9SDavid van Moolenbroek printf("%s\n", fn);
159*d794ecc9SDavid van Moolenbroek if (c && !cflag && !lflag && !Lflag &&
160*d794ecc9SDavid van Moolenbroek binbehave == BIN_FILE_BIN && nottext && !qflag)
161*d794ecc9SDavid van Moolenbroek printf("Binary file %s matches\n", fn);
162*d794ecc9SDavid van Moolenbroek
163*d794ecc9SDavid van Moolenbroek return c;
164*d794ecc9SDavid van Moolenbroek }
165*d794ecc9SDavid van Moolenbroek
166*d794ecc9SDavid van Moolenbroek
167*d794ecc9SDavid van Moolenbroek /*
168*d794ecc9SDavid van Moolenbroek * Process an individual line in a file. Return non-zero if it matches.
169*d794ecc9SDavid van Moolenbroek */
170*d794ecc9SDavid van Moolenbroek
171*d794ecc9SDavid van Moolenbroek #define isword(x) (isalnum((unsigned char)x) || (x) == '_')
172*d794ecc9SDavid van Moolenbroek
173*d794ecc9SDavid van Moolenbroek static int
procline(str_t * l,int nottext)174*d794ecc9SDavid van Moolenbroek procline(str_t *l, int nottext)
175*d794ecc9SDavid van Moolenbroek {
176*d794ecc9SDavid van Moolenbroek regmatch_t pmatch;
177*d794ecc9SDavid van Moolenbroek int c, i, r;
178*d794ecc9SDavid van Moolenbroek regoff_t offset;
179*d794ecc9SDavid van Moolenbroek
180*d794ecc9SDavid van Moolenbroek /* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
181*d794ecc9SDavid van Moolenbroek * into regoff_t */
182*d794ecc9SDavid van Moolenbroek if (l->len > SSIZE_MAX) {
183*d794ecc9SDavid van Moolenbroek errx(2, "Line is too big to process");
184*d794ecc9SDavid van Moolenbroek }
185*d794ecc9SDavid van Moolenbroek
186*d794ecc9SDavid van Moolenbroek c = 0;
187*d794ecc9SDavid van Moolenbroek i = 0;
188*d794ecc9SDavid van Moolenbroek if (matchall) {
189*d794ecc9SDavid van Moolenbroek c = 1;
190*d794ecc9SDavid van Moolenbroek goto print;
191*d794ecc9SDavid van Moolenbroek }
192*d794ecc9SDavid van Moolenbroek
193*d794ecc9SDavid van Moolenbroek for (i = 0; i < patterns; i++) {
194*d794ecc9SDavid van Moolenbroek offset = 0;
195*d794ecc9SDavid van Moolenbroek redo:
196*d794ecc9SDavid van Moolenbroek if (fg_pattern[i].pattern) {
197*d794ecc9SDavid van Moolenbroek r = grep_search(&fg_pattern[i],
198*d794ecc9SDavid van Moolenbroek (unsigned char *)l->dat + offset, l->len - offset,
199*d794ecc9SDavid van Moolenbroek &pmatch);
200*d794ecc9SDavid van Moolenbroek pmatch.rm_so += offset;
201*d794ecc9SDavid van Moolenbroek pmatch.rm_eo += offset;
202*d794ecc9SDavid van Moolenbroek } else {
203*d794ecc9SDavid van Moolenbroek pmatch.rm_so = offset;
204*d794ecc9SDavid van Moolenbroek pmatch.rm_eo = l->len;
205*d794ecc9SDavid van Moolenbroek r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
206*d794ecc9SDavid van Moolenbroek }
207*d794ecc9SDavid van Moolenbroek if (r == 0 && xflag) {
208*d794ecc9SDavid van Moolenbroek if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
209*d794ecc9SDavid van Moolenbroek r = REG_NOMATCH;
210*d794ecc9SDavid van Moolenbroek }
211*d794ecc9SDavid van Moolenbroek if (r == 0) {
212*d794ecc9SDavid van Moolenbroek c = 1;
213*d794ecc9SDavid van Moolenbroek if (oflag && pmatch.rm_so != pmatch.rm_eo)
214*d794ecc9SDavid van Moolenbroek goto print;
215*d794ecc9SDavid van Moolenbroek break;
216*d794ecc9SDavid van Moolenbroek }
217*d794ecc9SDavid van Moolenbroek }
218*d794ecc9SDavid van Moolenbroek if (oflag)
219*d794ecc9SDavid van Moolenbroek return c;
220*d794ecc9SDavid van Moolenbroek print:
221*d794ecc9SDavid van Moolenbroek if (vflag)
222*d794ecc9SDavid van Moolenbroek c = !c;
223*d794ecc9SDavid van Moolenbroek
224*d794ecc9SDavid van Moolenbroek if (c && binbehave == BIN_FILE_BIN && nottext)
225*d794ecc9SDavid van Moolenbroek return c; /* Binary file */
226*d794ecc9SDavid van Moolenbroek
227*d794ecc9SDavid van Moolenbroek if ((tail > 0 || c) && !cflag && !qflag) {
228*d794ecc9SDavid van Moolenbroek if (c) {
229*d794ecc9SDavid van Moolenbroek if (first > 0 && tail == 0 && (Bflag < linesqueued) &&
230*d794ecc9SDavid van Moolenbroek (Aflag || Bflag))
231*d794ecc9SDavid van Moolenbroek printf("--\n");
232*d794ecc9SDavid van Moolenbroek first = 1;
233*d794ecc9SDavid van Moolenbroek tail = Aflag;
234*d794ecc9SDavid van Moolenbroek if (Bflag > 0)
235*d794ecc9SDavid van Moolenbroek printqueue();
236*d794ecc9SDavid van Moolenbroek linesqueued = 0;
237*d794ecc9SDavid van Moolenbroek printline(l, ':', oflag ? &pmatch : NULL);
238*d794ecc9SDavid van Moolenbroek } else {
239*d794ecc9SDavid van Moolenbroek printline(l, '-', oflag ? &pmatch : NULL);
240*d794ecc9SDavid van Moolenbroek tail--;
241*d794ecc9SDavid van Moolenbroek }
242*d794ecc9SDavid van Moolenbroek }
243*d794ecc9SDavid van Moolenbroek if (oflag && !matchall) {
244*d794ecc9SDavid van Moolenbroek offset = pmatch.rm_eo;
245*d794ecc9SDavid van Moolenbroek goto redo;
246*d794ecc9SDavid van Moolenbroek }
247*d794ecc9SDavid van Moolenbroek return c;
248*d794ecc9SDavid van Moolenbroek }
249*d794ecc9SDavid van Moolenbroek
250*d794ecc9SDavid van Moolenbroek #ifndef SMALL
251*d794ecc9SDavid van Moolenbroek void
fgrepcomp(fastgrep_t * fg,const unsigned char * pat)252*d794ecc9SDavid van Moolenbroek fgrepcomp(fastgrep_t *fg, const unsigned char *pat)
253*d794ecc9SDavid van Moolenbroek {
254*d794ecc9SDavid van Moolenbroek int i;
255*d794ecc9SDavid van Moolenbroek
256*d794ecc9SDavid van Moolenbroek /* Initialize. */
257*d794ecc9SDavid van Moolenbroek fg->patternLen = strlen((const char *)pat);
258*d794ecc9SDavid van Moolenbroek fg->bol = 0;
259*d794ecc9SDavid van Moolenbroek fg->eol = 0;
260*d794ecc9SDavid van Moolenbroek fg->wmatch = wflag;
261*d794ecc9SDavid van Moolenbroek fg->reversedSearch = 0;
262*d794ecc9SDavid van Moolenbroek
263*d794ecc9SDavid van Moolenbroek /*
264*d794ecc9SDavid van Moolenbroek * Make a copy and upper case it for later if in -i mode,
265*d794ecc9SDavid van Moolenbroek * else just copy the pointer.
266*d794ecc9SDavid van Moolenbroek */
267*d794ecc9SDavid van Moolenbroek if (iflag) {
268*d794ecc9SDavid van Moolenbroek fg->pattern = grep_malloc(fg->patternLen + 1);
269*d794ecc9SDavid van Moolenbroek for (i = 0; i < fg->patternLen; i++)
270*d794ecc9SDavid van Moolenbroek fg->pattern[i] = toupper(pat[i]);
271*d794ecc9SDavid van Moolenbroek fg->pattern[fg->patternLen] = '\0';
272*d794ecc9SDavid van Moolenbroek } else
273*d794ecc9SDavid van Moolenbroek fg->pattern = __UNCONST(pat); /* really const */
274*d794ecc9SDavid van Moolenbroek
275*d794ecc9SDavid van Moolenbroek /* Preprocess pattern. */
276*d794ecc9SDavid van Moolenbroek for (i = 0; i <= UCHAR_MAX; i++)
277*d794ecc9SDavid van Moolenbroek fg->qsBc[i] = fg->patternLen;
278*d794ecc9SDavid van Moolenbroek for (i = 1; i < fg->patternLen; i++) {
279*d794ecc9SDavid van Moolenbroek fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
280*d794ecc9SDavid van Moolenbroek /*
281*d794ecc9SDavid van Moolenbroek * If case is ignored, make the jump apply to both upper and
282*d794ecc9SDavid van Moolenbroek * lower cased characters. As the pattern is stored in upper
283*d794ecc9SDavid van Moolenbroek * case, apply the same to the lower case equivalents.
284*d794ecc9SDavid van Moolenbroek */
285*d794ecc9SDavid van Moolenbroek if (iflag)
286*d794ecc9SDavid van Moolenbroek fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
287*d794ecc9SDavid van Moolenbroek }
288*d794ecc9SDavid van Moolenbroek }
289*d794ecc9SDavid van Moolenbroek #endif
290*d794ecc9SDavid van Moolenbroek
291*d794ecc9SDavid van Moolenbroek /*
292*d794ecc9SDavid van Moolenbroek * Returns: -1 on failure, 0 on success
293*d794ecc9SDavid van Moolenbroek */
294*d794ecc9SDavid van Moolenbroek int
fastcomp(fastgrep_t * fg,const char * pat)295*d794ecc9SDavid van Moolenbroek fastcomp(fastgrep_t *fg, const char *pat)
296*d794ecc9SDavid van Moolenbroek {
297*d794ecc9SDavid van Moolenbroek #ifdef SMALL
298*d794ecc9SDavid van Moolenbroek return -1;
299*d794ecc9SDavid van Moolenbroek #else
300*d794ecc9SDavid van Moolenbroek int i;
301*d794ecc9SDavid van Moolenbroek int bol = 0;
302*d794ecc9SDavid van Moolenbroek int eol = 0;
303*d794ecc9SDavid van Moolenbroek int shiftPatternLen;
304*d794ecc9SDavid van Moolenbroek int hasDot = 0;
305*d794ecc9SDavid van Moolenbroek int firstHalfDot = -1;
306*d794ecc9SDavid van Moolenbroek int firstLastHalfDot = -1;
307*d794ecc9SDavid van Moolenbroek int lastHalfDot = 0;
308*d794ecc9SDavid van Moolenbroek
309*d794ecc9SDavid van Moolenbroek /* Initialize. */
310*d794ecc9SDavid van Moolenbroek fg->patternLen = strlen(pat);
311*d794ecc9SDavid van Moolenbroek fg->bol = 0;
312*d794ecc9SDavid van Moolenbroek fg->eol = 0;
313*d794ecc9SDavid van Moolenbroek fg->wmatch = 0;
314*d794ecc9SDavid van Moolenbroek fg->reversedSearch = 0;
315*d794ecc9SDavid van Moolenbroek
316*d794ecc9SDavid van Moolenbroek /* Remove end-of-line character ('$'). */
317*d794ecc9SDavid van Moolenbroek if (fg->patternLen > 0 && pat[fg->patternLen - 1] == '$') {
318*d794ecc9SDavid van Moolenbroek eol++;
319*d794ecc9SDavid van Moolenbroek fg->eol = 1;
320*d794ecc9SDavid van Moolenbroek fg->patternLen--;
321*d794ecc9SDavid van Moolenbroek }
322*d794ecc9SDavid van Moolenbroek
323*d794ecc9SDavid van Moolenbroek /* Remove beginning-of-line character ('^'). */
324*d794ecc9SDavid van Moolenbroek if (pat[0] == '^') {
325*d794ecc9SDavid van Moolenbroek bol++;
326*d794ecc9SDavid van Moolenbroek fg->bol = 1;
327*d794ecc9SDavid van Moolenbroek fg->patternLen--;
328*d794ecc9SDavid van Moolenbroek }
329*d794ecc9SDavid van Moolenbroek
330*d794ecc9SDavid van Moolenbroek /* Remove enclosing [[:<:]] and [[:>:]] (word match). */
331*d794ecc9SDavid van Moolenbroek if (wflag) {
332*d794ecc9SDavid van Moolenbroek /* basic re's use \( \), extended re's ( ) */
333*d794ecc9SDavid van Moolenbroek int extra = Eflag ? 1 : 2;
334*d794ecc9SDavid van Moolenbroek fg->patternLen -= 14 + 2 * extra;
335*d794ecc9SDavid van Moolenbroek fg->wmatch = 7 + extra;
336*d794ecc9SDavid van Moolenbroek } else if (fg->patternLen >= 14 &&
337*d794ecc9SDavid van Moolenbroek strncmp(pat + fg->bol, "[[:<:]]", 7) == 0 &&
338*d794ecc9SDavid van Moolenbroek strncmp(pat + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
339*d794ecc9SDavid van Moolenbroek fg->patternLen -= 14;
340*d794ecc9SDavid van Moolenbroek fg->wmatch = 7;
341*d794ecc9SDavid van Moolenbroek }
342*d794ecc9SDavid van Moolenbroek
343*d794ecc9SDavid van Moolenbroek /*
344*d794ecc9SDavid van Moolenbroek * Copy pattern minus '^' and '$' characters as well as word
345*d794ecc9SDavid van Moolenbroek * match character classes at the beginning and ending of the
346*d794ecc9SDavid van Moolenbroek * string respectively.
347*d794ecc9SDavid van Moolenbroek */
348*d794ecc9SDavid van Moolenbroek fg->pattern = grep_malloc(fg->patternLen + 1);
349*d794ecc9SDavid van Moolenbroek memcpy(fg->pattern, pat + bol + fg->wmatch, fg->patternLen);
350*d794ecc9SDavid van Moolenbroek fg->pattern[fg->patternLen] = '\0';
351*d794ecc9SDavid van Moolenbroek
352*d794ecc9SDavid van Moolenbroek /* Look for ways to cheat...er...avoid the full regex engine. */
353*d794ecc9SDavid van Moolenbroek for (i = 0; i < fg->patternLen; i++)
354*d794ecc9SDavid van Moolenbroek {
355*d794ecc9SDavid van Moolenbroek switch (fg->pattern[i]) {
356*d794ecc9SDavid van Moolenbroek case '.':
357*d794ecc9SDavid van Moolenbroek hasDot = i;
358*d794ecc9SDavid van Moolenbroek if (i < fg->patternLen / 2) {
359*d794ecc9SDavid van Moolenbroek if (firstHalfDot < 0)
360*d794ecc9SDavid van Moolenbroek /* Closest dot to the beginning */
361*d794ecc9SDavid van Moolenbroek firstHalfDot = i;
362*d794ecc9SDavid van Moolenbroek } else {
363*d794ecc9SDavid van Moolenbroek /* Closest dot to the end of the pattern. */
364*d794ecc9SDavid van Moolenbroek lastHalfDot = i;
365*d794ecc9SDavid van Moolenbroek if (firstLastHalfDot < 0)
366*d794ecc9SDavid van Moolenbroek firstLastHalfDot = i;
367*d794ecc9SDavid van Moolenbroek }
368*d794ecc9SDavid van Moolenbroek break;
369*d794ecc9SDavid van Moolenbroek case '(': case ')':
370*d794ecc9SDavid van Moolenbroek case '{': case '}':
371*d794ecc9SDavid van Moolenbroek /* Special in BRE if preceded by '\\' */
372*d794ecc9SDavid van Moolenbroek case '?':
373*d794ecc9SDavid van Moolenbroek case '+':
374*d794ecc9SDavid van Moolenbroek case '|':
375*d794ecc9SDavid van Moolenbroek /* Not special in BRE. */
376*d794ecc9SDavid van Moolenbroek if (!Eflag)
377*d794ecc9SDavid van Moolenbroek goto nonspecial;
378*d794ecc9SDavid van Moolenbroek case '\\':
379*d794ecc9SDavid van Moolenbroek case '*':
380*d794ecc9SDavid van Moolenbroek case '[': case ']':
381*d794ecc9SDavid van Moolenbroek /* Free memory and let others know this is empty. */
382*d794ecc9SDavid van Moolenbroek free(fg->pattern);
383*d794ecc9SDavid van Moolenbroek fg->pattern = NULL;
384*d794ecc9SDavid van Moolenbroek return (-1);
385*d794ecc9SDavid van Moolenbroek default:
386*d794ecc9SDavid van Moolenbroek nonspecial:
387*d794ecc9SDavid van Moolenbroek if (iflag)
388*d794ecc9SDavid van Moolenbroek fg->pattern[i] = toupper(fg->pattern[i]);
389*d794ecc9SDavid van Moolenbroek break;
390*d794ecc9SDavid van Moolenbroek }
391*d794ecc9SDavid van Moolenbroek }
392*d794ecc9SDavid van Moolenbroek
393*d794ecc9SDavid van Moolenbroek /*
394*d794ecc9SDavid van Moolenbroek * Determine if a reverse search would be faster based on the placement
395*d794ecc9SDavid van Moolenbroek * of the dots.
396*d794ecc9SDavid van Moolenbroek */
397*d794ecc9SDavid van Moolenbroek if ((!(lflag || cflag)) && ((!(bol || eol)) &&
398*d794ecc9SDavid van Moolenbroek ((lastHalfDot) && ((firstHalfDot < 0) ||
399*d794ecc9SDavid van Moolenbroek ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
400*d794ecc9SDavid van Moolenbroek fg->reversedSearch = 1;
401*d794ecc9SDavid van Moolenbroek hasDot = fg->patternLen - (firstHalfDot < 0 ?
402*d794ecc9SDavid van Moolenbroek firstLastHalfDot : firstHalfDot) - 1;
403*d794ecc9SDavid van Moolenbroek grep_revstr(fg->pattern, fg->patternLen);
404*d794ecc9SDavid van Moolenbroek }
405*d794ecc9SDavid van Moolenbroek
406*d794ecc9SDavid van Moolenbroek /*
407*d794ecc9SDavid van Moolenbroek * Normal Quick Search would require a shift based on the position the
408*d794ecc9SDavid van Moolenbroek * next character after the comparison is within the pattern. With
409*d794ecc9SDavid van Moolenbroek * wildcards, the position of the last dot effects the maximum shift
410*d794ecc9SDavid van Moolenbroek * distance.
411*d794ecc9SDavid van Moolenbroek * The closer to the end the wild card is the slower the search. A
412*d794ecc9SDavid van Moolenbroek * reverse version of this algorithm would be useful for wildcards near
413*d794ecc9SDavid van Moolenbroek * the end of the string.
414*d794ecc9SDavid van Moolenbroek *
415*d794ecc9SDavid van Moolenbroek * Examples:
416*d794ecc9SDavid van Moolenbroek * Pattern Max shift
417*d794ecc9SDavid van Moolenbroek * ------- ---------
418*d794ecc9SDavid van Moolenbroek * this 5
419*d794ecc9SDavid van Moolenbroek * .his 4
420*d794ecc9SDavid van Moolenbroek * t.is 3
421*d794ecc9SDavid van Moolenbroek * th.s 2
422*d794ecc9SDavid van Moolenbroek * thi. 1
423*d794ecc9SDavid van Moolenbroek */
424*d794ecc9SDavid van Moolenbroek
425*d794ecc9SDavid van Moolenbroek /* Adjust the shift based on location of the last dot ('.'). */
426*d794ecc9SDavid van Moolenbroek shiftPatternLen = fg->patternLen - hasDot;
427*d794ecc9SDavid van Moolenbroek
428*d794ecc9SDavid van Moolenbroek /* Preprocess pattern. */
429*d794ecc9SDavid van Moolenbroek for (i = 0; i <= UCHAR_MAX; i++)
430*d794ecc9SDavid van Moolenbroek fg->qsBc[i] = shiftPatternLen;
431*d794ecc9SDavid van Moolenbroek for (i = hasDot + 1; i < fg->patternLen; i++) {
432*d794ecc9SDavid van Moolenbroek fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
433*d794ecc9SDavid van Moolenbroek /*
434*d794ecc9SDavid van Moolenbroek * If case is ignored, make the jump apply to both upper and
435*d794ecc9SDavid van Moolenbroek * lower cased characters. As the pattern is stored in upper
436*d794ecc9SDavid van Moolenbroek * case, apply the same to the lower case equivalents.
437*d794ecc9SDavid van Moolenbroek */
438*d794ecc9SDavid van Moolenbroek if (iflag)
439*d794ecc9SDavid van Moolenbroek fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
440*d794ecc9SDavid van Moolenbroek }
441*d794ecc9SDavid van Moolenbroek
442*d794ecc9SDavid van Moolenbroek /*
443*d794ecc9SDavid van Moolenbroek * Put pattern back to normal after pre-processing to allow for easy
444*d794ecc9SDavid van Moolenbroek * comparisons later.
445*d794ecc9SDavid van Moolenbroek */
446*d794ecc9SDavid van Moolenbroek if (fg->reversedSearch)
447*d794ecc9SDavid van Moolenbroek grep_revstr(fg->pattern, fg->patternLen);
448*d794ecc9SDavid van Moolenbroek
449*d794ecc9SDavid van Moolenbroek return (0);
450*d794ecc9SDavid van Moolenbroek #endif
451*d794ecc9SDavid van Moolenbroek }
452*d794ecc9SDavid van Moolenbroek
453*d794ecc9SDavid van Moolenbroek /*
454*d794ecc9SDavid van Moolenbroek * Word boundaries using regular expressions are defined as the point
455*d794ecc9SDavid van Moolenbroek * of transition from a non-word char to a word char, or vice versa.
456*d794ecc9SDavid van Moolenbroek * This means that grep -w +a and grep -w a+ never match anything,
457*d794ecc9SDavid van Moolenbroek * because they lack a starting or ending transition, but grep -w a+b
458*d794ecc9SDavid van Moolenbroek * does match a line containing a+b.
459*d794ecc9SDavid van Moolenbroek */
460*d794ecc9SDavid van Moolenbroek #define wmatch(d, l, s, e) \
461*d794ecc9SDavid van Moolenbroek ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
462*d794ecc9SDavid van Moolenbroek e > s && isword(d[s]) && isword(d[e-1]))
463*d794ecc9SDavid van Moolenbroek
464*d794ecc9SDavid van Moolenbroek static int
grep_search(fastgrep_t * fg,unsigned char * data,size_t dataLen,regmatch_t * pmatch)465*d794ecc9SDavid van Moolenbroek grep_search(fastgrep_t *fg, unsigned char *data, size_t dataLen,
466*d794ecc9SDavid van Moolenbroek regmatch_t *pmatch)
467*d794ecc9SDavid van Moolenbroek {
468*d794ecc9SDavid van Moolenbroek #ifdef SMALL
469*d794ecc9SDavid van Moolenbroek return 0;
470*d794ecc9SDavid van Moolenbroek #else
471*d794ecc9SDavid van Moolenbroek regoff_t j;
472*d794ecc9SDavid van Moolenbroek int rtrnVal = REG_NOMATCH;
473*d794ecc9SDavid van Moolenbroek
474*d794ecc9SDavid van Moolenbroek pmatch->rm_so = -1;
475*d794ecc9SDavid van Moolenbroek pmatch->rm_eo = -1;
476*d794ecc9SDavid van Moolenbroek
477*d794ecc9SDavid van Moolenbroek /* No point in going farther if we do not have enough data. */
478*d794ecc9SDavid van Moolenbroek if (dataLen < (size_t)fg->patternLen)
479*d794ecc9SDavid van Moolenbroek return (rtrnVal);
480*d794ecc9SDavid van Moolenbroek
481*d794ecc9SDavid van Moolenbroek /* Only try once at the beginning or ending of the line. */
482*d794ecc9SDavid van Moolenbroek if (fg->bol || fg->eol) {
483*d794ecc9SDavid van Moolenbroek /* Simple text comparison. */
484*d794ecc9SDavid van Moolenbroek /* Verify data is >= pattern length before searching on it. */
485*d794ecc9SDavid van Moolenbroek if (dataLen >= (size_t)fg->patternLen) {
486*d794ecc9SDavid van Moolenbroek /* Determine where in data to start search at. */
487*d794ecc9SDavid van Moolenbroek if (fg->eol)
488*d794ecc9SDavid van Moolenbroek j = dataLen - fg->patternLen;
489*d794ecc9SDavid van Moolenbroek else
490*d794ecc9SDavid van Moolenbroek j = 0;
491*d794ecc9SDavid van Moolenbroek if (!((fg->bol && fg->eol) &&
492*d794ecc9SDavid van Moolenbroek (dataLen != (size_t)fg->patternLen)))
493*d794ecc9SDavid van Moolenbroek if (grep_cmp(fg->pattern, data + j,
494*d794ecc9SDavid van Moolenbroek fg->patternLen) == -1) {
495*d794ecc9SDavid van Moolenbroek pmatch->rm_so = j;
496*d794ecc9SDavid van Moolenbroek pmatch->rm_eo = j + fg->patternLen;
497*d794ecc9SDavid van Moolenbroek if (!fg->wmatch || wmatch(data, dataLen,
498*d794ecc9SDavid van Moolenbroek pmatch->rm_so, pmatch->rm_eo))
499*d794ecc9SDavid van Moolenbroek rtrnVal = 0;
500*d794ecc9SDavid van Moolenbroek }
501*d794ecc9SDavid van Moolenbroek }
502*d794ecc9SDavid van Moolenbroek } else if (fg->reversedSearch) {
503*d794ecc9SDavid van Moolenbroek /* Quick Search algorithm. */
504*d794ecc9SDavid van Moolenbroek j = dataLen;
505*d794ecc9SDavid van Moolenbroek do {
506*d794ecc9SDavid van Moolenbroek if (grep_cmp(fg->pattern, data + j - fg->patternLen,
507*d794ecc9SDavid van Moolenbroek fg->patternLen) == -1) {
508*d794ecc9SDavid van Moolenbroek pmatch->rm_so = j - fg->patternLen;
509*d794ecc9SDavid van Moolenbroek pmatch->rm_eo = j;
510*d794ecc9SDavid van Moolenbroek if (!fg->wmatch || wmatch(data, dataLen,
511*d794ecc9SDavid van Moolenbroek pmatch->rm_so, pmatch->rm_eo)) {
512*d794ecc9SDavid van Moolenbroek rtrnVal = 0;
513*d794ecc9SDavid van Moolenbroek break;
514*d794ecc9SDavid van Moolenbroek }
515*d794ecc9SDavid van Moolenbroek }
516*d794ecc9SDavid van Moolenbroek /* Shift if within bounds, otherwise, we are done. */
517*d794ecc9SDavid van Moolenbroek if (j == fg->patternLen)
518*d794ecc9SDavid van Moolenbroek break;
519*d794ecc9SDavid van Moolenbroek j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]];
520*d794ecc9SDavid van Moolenbroek } while (j >= fg->patternLen);
521*d794ecc9SDavid van Moolenbroek } else {
522*d794ecc9SDavid van Moolenbroek /* Quick Search algorithm. */
523*d794ecc9SDavid van Moolenbroek j = 0;
524*d794ecc9SDavid van Moolenbroek do {
525*d794ecc9SDavid van Moolenbroek if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) {
526*d794ecc9SDavid van Moolenbroek pmatch->rm_so = j;
527*d794ecc9SDavid van Moolenbroek pmatch->rm_eo = j + fg->patternLen;
528*d794ecc9SDavid van Moolenbroek if (fg->patternLen == 0 || !fg->wmatch ||
529*d794ecc9SDavid van Moolenbroek wmatch(data, dataLen, pmatch->rm_so,
530*d794ecc9SDavid van Moolenbroek pmatch->rm_eo)) {
531*d794ecc9SDavid van Moolenbroek rtrnVal = 0;
532*d794ecc9SDavid van Moolenbroek break;
533*d794ecc9SDavid van Moolenbroek }
534*d794ecc9SDavid van Moolenbroek }
535*d794ecc9SDavid van Moolenbroek
536*d794ecc9SDavid van Moolenbroek /* Shift if within bounds, otherwise, we are done. */
537*d794ecc9SDavid van Moolenbroek if (j + fg->patternLen == dataLen)
538*d794ecc9SDavid van Moolenbroek break;
539*d794ecc9SDavid van Moolenbroek else
540*d794ecc9SDavid van Moolenbroek j += fg->qsBc[(unsigned char)data[j + fg->patternLen]];
541*d794ecc9SDavid van Moolenbroek } while (j <= (dataLen - fg->patternLen));
542*d794ecc9SDavid van Moolenbroek }
543*d794ecc9SDavid van Moolenbroek
544*d794ecc9SDavid van Moolenbroek return (rtrnVal);
545*d794ecc9SDavid van Moolenbroek #endif
546*d794ecc9SDavid van Moolenbroek }
547*d794ecc9SDavid van Moolenbroek
548*d794ecc9SDavid van Moolenbroek
549*d794ecc9SDavid van Moolenbroek void *
grep_malloc(size_t size)550*d794ecc9SDavid van Moolenbroek grep_malloc(size_t size)
551*d794ecc9SDavid van Moolenbroek {
552*d794ecc9SDavid van Moolenbroek void *ptr;
553*d794ecc9SDavid van Moolenbroek
554*d794ecc9SDavid van Moolenbroek if ((ptr = malloc(size)) == NULL)
555*d794ecc9SDavid van Moolenbroek err(2, "malloc");
556*d794ecc9SDavid van Moolenbroek return ptr;
557*d794ecc9SDavid van Moolenbroek }
558*d794ecc9SDavid van Moolenbroek
559*d794ecc9SDavid van Moolenbroek void *
grep_calloc(size_t nmemb,size_t size)560*d794ecc9SDavid van Moolenbroek grep_calloc(size_t nmemb, size_t size)
561*d794ecc9SDavid van Moolenbroek {
562*d794ecc9SDavid van Moolenbroek void *ptr;
563*d794ecc9SDavid van Moolenbroek
564*d794ecc9SDavid van Moolenbroek if ((ptr = calloc(nmemb, size)) == NULL)
565*d794ecc9SDavid van Moolenbroek err(2, "calloc");
566*d794ecc9SDavid van Moolenbroek return ptr;
567*d794ecc9SDavid van Moolenbroek }
568*d794ecc9SDavid van Moolenbroek
569*d794ecc9SDavid van Moolenbroek void *
grep_realloc(void * ptr,size_t size)570*d794ecc9SDavid van Moolenbroek grep_realloc(void *ptr, size_t size)
571*d794ecc9SDavid van Moolenbroek {
572*d794ecc9SDavid van Moolenbroek if ((ptr = realloc(ptr, size)) == NULL)
573*d794ecc9SDavid van Moolenbroek err(2, "realloc");
574*d794ecc9SDavid van Moolenbroek return ptr;
575*d794ecc9SDavid van Moolenbroek }
576*d794ecc9SDavid van Moolenbroek
577*d794ecc9SDavid van Moolenbroek #ifndef SMALL
578*d794ecc9SDavid van Moolenbroek /*
579*d794ecc9SDavid van Moolenbroek * Returns: i >= 0 on failure (position that it failed)
580*d794ecc9SDavid van Moolenbroek * -1 on success
581*d794ecc9SDavid van Moolenbroek */
582*d794ecc9SDavid van Moolenbroek static int
grep_cmp(const unsigned char * pat,const unsigned char * data,size_t len)583*d794ecc9SDavid van Moolenbroek grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len)
584*d794ecc9SDavid van Moolenbroek {
585*d794ecc9SDavid van Moolenbroek size_t i;
586*d794ecc9SDavid van Moolenbroek
587*d794ecc9SDavid van Moolenbroek for (i = 0; i < len; i++) {
588*d794ecc9SDavid van Moolenbroek if (((pat[i] == data[i]) || (!Fflag && pat[i] == '.'))
589*d794ecc9SDavid van Moolenbroek || (iflag && pat[i] == toupper(data[i])))
590*d794ecc9SDavid van Moolenbroek continue;
591*d794ecc9SDavid van Moolenbroek return (i);
592*d794ecc9SDavid van Moolenbroek }
593*d794ecc9SDavid van Moolenbroek
594*d794ecc9SDavid van Moolenbroek return (-1);
595*d794ecc9SDavid van Moolenbroek }
596*d794ecc9SDavid van Moolenbroek
597*d794ecc9SDavid van Moolenbroek static void
grep_revstr(unsigned char * str,int len)598*d794ecc9SDavid van Moolenbroek grep_revstr(unsigned char *str, int len)
599*d794ecc9SDavid van Moolenbroek {
600*d794ecc9SDavid van Moolenbroek int i;
601*d794ecc9SDavid van Moolenbroek char c;
602*d794ecc9SDavid van Moolenbroek
603*d794ecc9SDavid van Moolenbroek for (i = 0; i < len / 2; i++) {
604*d794ecc9SDavid van Moolenbroek c = str[i];
605*d794ecc9SDavid van Moolenbroek str[i] = str[len - i - 1];
606*d794ecc9SDavid van Moolenbroek str[len - i - 1] = c;
607*d794ecc9SDavid van Moolenbroek }
608*d794ecc9SDavid van Moolenbroek }
609*d794ecc9SDavid van Moolenbroek #endif
610*d794ecc9SDavid van Moolenbroek
611*d794ecc9SDavid van Moolenbroek void
printline(str_t * line,int sep,regmatch_t * pmatch)612*d794ecc9SDavid van Moolenbroek printline(str_t *line, int sep, regmatch_t *pmatch)
613*d794ecc9SDavid van Moolenbroek {
614*d794ecc9SDavid van Moolenbroek int n;
615*d794ecc9SDavid van Moolenbroek
616*d794ecc9SDavid van Moolenbroek n = 0;
617*d794ecc9SDavid van Moolenbroek if (!hflag) {
618*d794ecc9SDavid van Moolenbroek fputs(line->file, stdout);
619*d794ecc9SDavid van Moolenbroek ++n;
620*d794ecc9SDavid van Moolenbroek }
621*d794ecc9SDavid van Moolenbroek if (nflag) {
622*d794ecc9SDavid van Moolenbroek if (n)
623*d794ecc9SDavid van Moolenbroek putchar(sep);
624*d794ecc9SDavid van Moolenbroek printf("%d", line->line_no);
625*d794ecc9SDavid van Moolenbroek ++n;
626*d794ecc9SDavid van Moolenbroek }
627*d794ecc9SDavid van Moolenbroek if (bflag) {
628*d794ecc9SDavid van Moolenbroek if (n)
629*d794ecc9SDavid van Moolenbroek putchar(sep);
630*d794ecc9SDavid van Moolenbroek printf("%lld", (long long)line->off);
631*d794ecc9SDavid van Moolenbroek ++n;
632*d794ecc9SDavid van Moolenbroek }
633*d794ecc9SDavid van Moolenbroek if (n)
634*d794ecc9SDavid van Moolenbroek putchar(sep);
635*d794ecc9SDavid van Moolenbroek if (pmatch)
636*d794ecc9SDavid van Moolenbroek fwrite(line->dat + pmatch->rm_so,
637*d794ecc9SDavid van Moolenbroek pmatch->rm_eo - pmatch->rm_so, 1, stdout);
638*d794ecc9SDavid van Moolenbroek else
639*d794ecc9SDavid van Moolenbroek fwrite(line->dat, line->len, 1, stdout);
640*d794ecc9SDavid van Moolenbroek putchar('\n');
641*d794ecc9SDavid van Moolenbroek }
642