1 /* $OpenBSD: grep.c,v 1.67 2022/06/26 18:48:10 op Exp $ */
2
3 /*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/queue.h>
32
33 #include <ctype.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <getopt.h>
37 #include <regex.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42
43 #include "grep.h"
44
45 /* Flags passed to regcomp() and regexec() */
46 int cflags;
47 int eflags = REG_STARTEND;
48
49 int matchall; /* shortcut */
50 int patterns, pattern_sz;
51 char **pattern;
52 regex_t *r_pattern;
53 fastgrep_t *fg_pattern;
54
55 /* For regex errors */
56 char re_error[RE_ERROR_BUF + 1];
57
58 /* Command-line flags */
59 int Aflag; /* -A x: print x lines trailing each match */
60 int Bflag; /* -B x: print x lines leading each match */
61 int Eflag; /* -E: interpret pattern as extended regexp */
62 int Fflag; /* -F: interpret pattern as list of fixed strings */
63 int Hflag; /* -H: always print filename header */
64 int Lflag; /* -L: only show names of files with no matches */
65 int Rflag; /* -R: recursively search directory trees */
66 int Zflag; /* -Z: decompress input before processing */
67 int bflag; /* -b: show block numbers for each match */
68 int cflag; /* -c: only show a count of matching lines */
69 int hflag; /* -h: don't print filename headers */
70 int iflag; /* -i: ignore case */
71 int lflag; /* -l: only show names of files with matches */
72 int mflag; /* -m x: stop reading the files after x matches */
73 long long mcount; /* count for -m */
74 long long mlimit; /* requested value for -m */
75 int nflag; /* -n: show line numbers in front of matching lines */
76 int oflag; /* -o: print each match */
77 int qflag; /* -q: quiet mode (don't output anything) */
78 int sflag; /* -s: silent mode (ignore errors) */
79 int vflag; /* -v: only show non-matching lines */
80 int wflag; /* -w: pattern must start and end on word boundaries */
81 int xflag; /* -x: pattern must match entire line */
82 int lbflag; /* --line-buffered */
83 int nullflag; /* --null */
84 const char *labelname; /* --label=name */
85
86 int binbehave = BIN_FILE_BIN;
87
88 enum {
89 BIN_OPT = CHAR_MAX + 1,
90 HELP_OPT,
91 MMAP_OPT,
92 LINEBUF_OPT,
93 NULL_OPT,
94 LABEL_OPT,
95 };
96
97 /* Housekeeping */
98 int first; /* flag whether or not this is our first match */
99 int tail; /* lines left to print */
100 int file_err; /* file reading error */
101
102 struct patfile {
103 const char *pf_file;
104 SLIST_ENTRY(patfile) pf_next;
105 };
106 SLIST_HEAD(, patfile) patfilelh;
107
108 extern char *__progname;
109
110 static void
usage(void)111 usage(void)
112 {
113 fprintf(stderr,
114 #ifdef NOZ
115 "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]"
116 #else
117 "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]"
118 #endif
119 " [-e pattern]\n"
120 "\t[-f file] [-m num] [--binary-files=value] [--context[=num]]\n"
121 "\t[--label=name] [--line-buffered] [--null] [pattern]"
122 " [file ...]\n",
123 __progname);
124 exit(2);
125 }
126
127 #ifdef NOZ
128 static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy";
129 #else
130 static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy";
131 #endif
132
133 static const struct option long_options[] =
134 {
135 {"binary-files", required_argument, NULL, BIN_OPT},
136 {"help", no_argument, NULL, HELP_OPT},
137 {"mmap", no_argument, NULL, MMAP_OPT},
138 {"label", required_argument, NULL, LABEL_OPT},
139 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
140 {"null", no_argument, NULL, NULL_OPT},
141 {"after-context", required_argument, NULL, 'A'},
142 {"before-context", required_argument, NULL, 'B'},
143 {"context", optional_argument, NULL, 'C'},
144 {"devices", required_argument, NULL, 'D'},
145 {"extended-regexp", no_argument, NULL, 'E'},
146 {"fixed-strings", no_argument, NULL, 'F'},
147 {"basic-regexp", no_argument, NULL, 'G'},
148 {"with-filename", no_argument, NULL, 'H'},
149 {"binary", no_argument, NULL, 'U'},
150 {"version", no_argument, NULL, 'V'},
151 {"text", no_argument, NULL, 'a'},
152 {"byte-offset", no_argument, NULL, 'b'},
153 {"count", no_argument, NULL, 'c'},
154 {"regexp", required_argument, NULL, 'e'},
155 {"file", required_argument, NULL, 'f'},
156 {"no-filename", no_argument, NULL, 'h'},
157 {"ignore-case", no_argument, NULL, 'i'},
158 {"files-without-match", no_argument, NULL, 'L'},
159 {"files-with-matches", no_argument, NULL, 'l'},
160 {"max-count", required_argument, NULL, 'm'},
161 {"line-number", no_argument, NULL, 'n'},
162 {"quiet", no_argument, NULL, 'q'},
163 {"silent", no_argument, NULL, 'q'},
164 {"recursive", no_argument, NULL, 'r'},
165 {"no-messages", no_argument, NULL, 's'},
166 {"revert-match", no_argument, NULL, 'v'},
167 {"word-regexp", no_argument, NULL, 'w'},
168 {"line-regexp", no_argument, NULL, 'x'},
169 {"unix-byte-offsets", no_argument, NULL, 'u'},
170 #ifndef NOZ
171 {"decompress", no_argument, NULL, 'Z'},
172 #endif
173 {NULL, no_argument, NULL, 0}
174 };
175
176
177 static void
add_pattern(char * pat,size_t len)178 add_pattern(char *pat, size_t len)
179 {
180 if (!xflag && (len == 0 || matchall)) {
181 matchall = 1;
182 return;
183 }
184 if (patterns == pattern_sz) {
185 pattern_sz *= 2;
186 pattern = grep_reallocarray(pattern, ++pattern_sz, sizeof(*pattern));
187 }
188 if (len > 0 && pat[len - 1] == '\n')
189 --len;
190 /* pat may not be NUL-terminated */
191 if (wflag && !Fflag) {
192 int bol = 0, eol = 0, extra;
193 if (pat[0] == '^')
194 bol = 1;
195 if (len > 0 && pat[len - 1] == '$')
196 eol = 1;
197 extra = Eflag ? 2 : 4;
198 pattern[patterns] = grep_malloc(len + 15 + extra);
199 snprintf(pattern[patterns], len + 15 + extra,
200 "%s[[:<:]]%s%.*s%s[[:>:]]%s",
201 bol ? "^" : "",
202 Eflag ? "(" : "\\(",
203 (int)len - bol - eol, pat + bol,
204 Eflag ? ")" : "\\)",
205 eol ? "$" : "");
206 len += 14 + extra;
207 } else {
208 pattern[patterns] = grep_malloc(len + 1);
209 memcpy(pattern[patterns], pat, len);
210 pattern[patterns][len] = '\0';
211 }
212 ++patterns;
213 }
214
215 static void
add_patterns(char * pats)216 add_patterns(char *pats)
217 {
218 char *nl;
219
220 while ((nl = strchr(pats, '\n')) != NULL) {
221 add_pattern(pats, nl - pats);
222 pats = nl + 1;
223 }
224 add_pattern(pats, strlen(pats));
225 }
226
227 static void
read_patterns(const char * fn)228 read_patterns(const char *fn)
229 {
230 FILE *f;
231 char *line;
232 ssize_t len;
233 size_t linesize;
234
235 if ((f = fopen(fn, "r")) == NULL)
236 err(2, "%s", fn);
237 line = NULL;
238 linesize = 0;
239 while ((len = getline(&line, &linesize, f)) != -1)
240 add_pattern(line, *line == '\n' ? 0 : len);
241 if (ferror(f))
242 err(2, "%s", fn);
243 fclose(f);
244 free(line);
245 }
246
247 int
main(int argc,char * argv[])248 main(int argc, char *argv[])
249 {
250 int c, lastc, prevoptind, newarg, i, needpattern, exprs, expr_sz;
251 struct patfile *patfile, *pf_next;
252 long l;
253 char **expr;
254 const char *errstr;
255
256 if (pledge("stdio rpath", NULL) == -1)
257 err(2, "pledge");
258
259 SLIST_INIT(&patfilelh);
260 switch (__progname[0]) {
261 case 'e':
262 Eflag = 1;
263 break;
264 case 'f':
265 Fflag = 1;
266 break;
267 #ifndef NOZ
268 case 'z':
269 Zflag = 1;
270 switch(__progname[1]) {
271 case 'e':
272 Eflag = 1;
273 break;
274 case 'f':
275 Fflag = 1;
276 break;
277 }
278 break;
279 #endif
280 }
281
282 lastc = '\0';
283 newarg = 1;
284 prevoptind = 1;
285 needpattern = 1;
286 expr_sz = exprs = 0;
287 expr = NULL;
288 while ((c = getopt_long(argc, argv, optstr,
289 long_options, NULL)) != -1) {
290 switch (c) {
291 case '0': case '1': case '2': case '3': case '4':
292 case '5': case '6': case '7': case '8': case '9':
293 if (newarg || !isdigit(lastc))
294 Aflag = 0;
295 else if (Aflag > INT_MAX / 10)
296 errx(2, "context out of range");
297 Aflag = Bflag = (Aflag * 10) + (c - '0');
298 break;
299 case 'A':
300 case 'B':
301 l = strtonum(optarg, 1, INT_MAX, &errstr);
302 if (errstr != NULL)
303 errx(2, "context %s", errstr);
304 if (c == 'A')
305 Aflag = (int)l;
306 else
307 Bflag = (int)l;
308 break;
309 case 'C':
310 if (optarg == NULL)
311 Aflag = Bflag = 2;
312 else {
313 l = strtonum(optarg, 1, INT_MAX, &errstr);
314 if (errstr != NULL)
315 errx(2, "context %s", errstr);
316 Aflag = Bflag = (int)l;
317 }
318 break;
319 case 'E':
320 Fflag = 0;
321 Eflag = 1;
322 break;
323 case 'F':
324 Eflag = 0;
325 Fflag = 1;
326 break;
327 case 'G':
328 Eflag = Fflag = 0;
329 break;
330 case 'H':
331 Hflag = 1;
332 break;
333 case 'I':
334 binbehave = BIN_FILE_SKIP;
335 break;
336 case 'L':
337 lflag = 0;
338 Lflag = qflag = 1;
339 break;
340 case 'R':
341 case 'r':
342 Rflag = 1;
343 break;
344 case 'U':
345 binbehave = BIN_FILE_BIN;
346 break;
347 case 'V':
348 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN);
349 exit(0);
350 break;
351 #ifndef NOZ
352 case 'Z':
353 Zflag = 1;
354 break;
355 #endif
356 case 'a':
357 binbehave = BIN_FILE_TEXT;
358 break;
359 case 'b':
360 bflag = 1;
361 break;
362 case 'c':
363 cflag = 1;
364 break;
365 case 'e':
366 /* defer adding of expressions until all arguments are parsed */
367 if (exprs == expr_sz) {
368 expr_sz *= 2;
369 expr = grep_reallocarray(expr, ++expr_sz,
370 sizeof(*expr));
371 }
372 needpattern = 0;
373 expr[exprs] = optarg;
374 ++exprs;
375 break;
376 case 'f':
377 patfile = grep_malloc(sizeof(*patfile));
378 patfile->pf_file = optarg;
379 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next);
380 needpattern = 0;
381 break;
382 case 'h':
383 hflag = 1;
384 break;
385 case 'i':
386 case 'y':
387 iflag = 1;
388 cflags |= REG_ICASE;
389 break;
390 case 'l':
391 Lflag = 0;
392 lflag = qflag = 1;
393 break;
394 case 'm':
395 mflag = 1;
396 mlimit = mcount = strtonum(optarg, 0, LLONG_MAX,
397 &errstr);
398 if (errstr != NULL)
399 errx(2, "invalid max-count %s: %s",
400 optarg, errstr);
401 break;
402 case 'n':
403 nflag = 1;
404 break;
405 case 'o':
406 oflag = 1;
407 break;
408 case 'q':
409 qflag = 1;
410 break;
411 case 's':
412 sflag = 1;
413 break;
414 case 'v':
415 vflag = 1;
416 break;
417 case 'w':
418 wflag = 1;
419 break;
420 case 'x':
421 xflag = 1;
422 break;
423 case BIN_OPT:
424 if (strcmp("binary", optarg) == 0)
425 binbehave = BIN_FILE_BIN;
426 else if (strcmp("without-match", optarg) == 0)
427 binbehave = BIN_FILE_SKIP;
428 else if (strcmp("text", optarg) == 0)
429 binbehave = BIN_FILE_TEXT;
430 else
431 errx(2, "Unknown binary-files option");
432 break;
433 case 'u':
434 case MMAP_OPT:
435 /* default, compatibility */
436 break;
437 case LABEL_OPT:
438 labelname = optarg;
439 break;
440 case LINEBUF_OPT:
441 lbflag = 1;
442 break;
443 case NULL_OPT:
444 nullflag = 1;
445 break;
446 case HELP_OPT:
447 default:
448 usage();
449 }
450 lastc = c;
451 newarg = optind != prevoptind;
452 prevoptind = optind;
453 }
454 argc -= optind;
455 argv += optind;
456
457 for (i = 0; i < exprs; i++)
458 add_patterns(expr[i]);
459 free(expr);
460 expr = NULL;
461
462 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL;
463 patfile = pf_next) {
464 pf_next = SLIST_NEXT(patfile, pf_next);
465 read_patterns(patfile->pf_file);
466 free(patfile);
467 }
468
469 if (argc == 0 && needpattern)
470 usage();
471
472 if (argc != 0 && needpattern) {
473 add_patterns(*argv);
474 --argc;
475 ++argv;
476 }
477 if (argc == 1 && strcmp(*argv, "-") == 0) {
478 /* stdin */
479 --argc;
480 ++argv;
481 }
482
483 if (Eflag)
484 cflags |= REG_EXTENDED;
485 if (Fflag)
486 cflags |= REG_NOSPEC;
487 #ifdef SMALL
488 /* Sorry, this won't work */
489 if (Fflag && wflag)
490 errx(1, "Can't use small fgrep with -w");
491 #endif
492 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
493 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
494 for (i = 0; i < patterns; ++i) {
495 /* Check if cheating is allowed (always is for fgrep). */
496 #ifndef SMALL
497 if (Fflag) {
498 fgrepcomp(&fg_pattern[i], pattern[i]);
499 } else
500 #endif
501 {
502 if (fastcomp(&fg_pattern[i], pattern[i])) {
503 /* Fall back to full regex library */
504 c = regcomp(&r_pattern[i], pattern[i], cflags);
505 if (c != 0) {
506 regerror(c, &r_pattern[i], re_error,
507 RE_ERROR_BUF);
508 errx(2, "%s", re_error);
509 }
510 }
511 }
512 }
513
514 if (lbflag)
515 setvbuf(stdout, NULL, _IOLBF, 0);
516
517 if ((argc == 0 || argc == 1) && !Rflag && !Hflag)
518 hflag = 1;
519
520 if (argc == 0 && !Rflag)
521 exit(!procfile(NULL));
522
523 if (Rflag)
524 c = grep_tree(argv);
525 else
526 for (c = 0; argc--; ++argv)
527 c |= procfile(*argv);
528
529 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));
530 }
531