1 /* $OpenBSD: grep.c,v 1.59 2019/01/31 01:30:46 tedu Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stat.h> 31 #include <sys/queue.h> 32 33 #include <ctype.h> 34 #include <err.h> 35 #include <errno.h> 36 #include <getopt.h> 37 #include <regex.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include "grep.h" 44 45 /* Flags passed to regcomp() and regexec() */ 46 int cflags; 47 int eflags = REG_STARTEND; 48 49 int matchall; /* shortcut */ 50 int patterns, pattern_sz; 51 char **pattern; 52 regex_t *r_pattern; 53 fastgrep_t *fg_pattern; 54 55 /* For regex errors */ 56 char re_error[RE_ERROR_BUF + 1]; 57 58 /* Command-line flags */ 59 int Aflag; /* -A x: print x lines trailing each match */ 60 int Bflag; /* -B x: print x lines leading each match */ 61 int Eflag; /* -E: interpret pattern as extended regexp */ 62 int Fflag; /* -F: interpret pattern as list of fixed strings */ 63 int Hflag; /* -H: always print filename header */ 64 int Lflag; /* -L: only show names of files with no matches */ 65 int Rflag; /* -R: recursively search directory trees */ 66 int Zflag; /* -Z: decompress input before processing */ 67 int bflag; /* -b: show block numbers for each match */ 68 int cflag; /* -c: only show a count of matching lines */ 69 int hflag; /* -h: don't print filename headers */ 70 int iflag; /* -i: ignore case */ 71 int lflag; /* -l: only show names of files with matches */ 72 int mflag; /* -m x: stop reading the files after x matches */ 73 long long mcount; /* count for -m */ 74 long long mlimit; /* requested value for -m */ 75 int nflag; /* -n: show line numbers in front of matching lines */ 76 int oflag; /* -o: print each match */ 77 int qflag; /* -q: quiet mode (don't output anything) */ 78 int sflag; /* -s: silent mode (ignore errors) */ 79 int vflag; /* -v: only show non-matching lines */ 80 int wflag; /* -w: pattern must start and end on word boundaries */ 81 int xflag; /* -x: pattern must match entire line */ 82 int lbflag; /* --line-buffered */ 83 84 int binbehave = BIN_FILE_BIN; 85 86 enum { 87 BIN_OPT = CHAR_MAX + 1, 88 HELP_OPT, 89 MMAP_OPT, 90 LINEBUF_OPT 91 }; 92 93 /* Housekeeping */ 94 int first; /* flag whether or not this is our first match */ 95 int tail; /* lines left to print */ 96 int file_err; /* file reading error */ 97 98 struct patfile { 99 const char *pf_file; 100 SLIST_ENTRY(patfile) pf_next; 101 }; 102 SLIST_HEAD(, patfile) patfilelh; 103 104 extern char *__progname; 105 106 static void 107 usage(void) 108 { 109 fprintf(stderr, 110 #ifdef NOZ 111 "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]" 112 #else 113 "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]" 114 #endif 115 " [-e pattern]\n" 116 "\t[-f file] [-m num] [--binary-files=value] [--context[=num]]\n" 117 "\t[--line-buffered] [--max-count=num] [pattern] [file ...]\n", 118 __progname); 119 exit(2); 120 } 121 122 #ifdef NOZ 123 static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy"; 124 #else 125 static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy"; 126 #endif 127 128 static const struct option long_options[] = 129 { 130 {"binary-files", required_argument, NULL, BIN_OPT}, 131 {"help", no_argument, NULL, HELP_OPT}, 132 {"mmap", no_argument, NULL, MMAP_OPT}, 133 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 134 {"after-context", required_argument, NULL, 'A'}, 135 {"before-context", required_argument, NULL, 'B'}, 136 {"context", optional_argument, NULL, 'C'}, 137 {"devices", required_argument, NULL, 'D'}, 138 {"extended-regexp", no_argument, NULL, 'E'}, 139 {"fixed-strings", no_argument, NULL, 'F'}, 140 {"basic-regexp", no_argument, NULL, 'G'}, 141 {"with-filename", no_argument, NULL, 'H'}, 142 {"binary", no_argument, NULL, 'U'}, 143 {"version", no_argument, NULL, 'V'}, 144 {"text", no_argument, NULL, 'a'}, 145 {"byte-offset", no_argument, NULL, 'b'}, 146 {"count", no_argument, NULL, 'c'}, 147 {"regexp", required_argument, NULL, 'e'}, 148 {"file", required_argument, NULL, 'f'}, 149 {"no-filename", no_argument, NULL, 'h'}, 150 {"ignore-case", no_argument, NULL, 'i'}, 151 {"files-without-match", no_argument, NULL, 'L'}, 152 {"files-with-matches", no_argument, NULL, 'l'}, 153 {"max-count", required_argument, NULL, 'm'}, 154 {"line-number", no_argument, NULL, 'n'}, 155 {"quiet", no_argument, NULL, 'q'}, 156 {"silent", no_argument, NULL, 'q'}, 157 {"recursive", no_argument, NULL, 'r'}, 158 {"no-messages", no_argument, NULL, 's'}, 159 {"revert-match", no_argument, NULL, 'v'}, 160 {"word-regexp", no_argument, NULL, 'w'}, 161 {"line-regexp", no_argument, NULL, 'x'}, 162 {"unix-byte-offsets", no_argument, NULL, 'u'}, 163 #ifndef NOZ 164 {"decompress", no_argument, NULL, 'Z'}, 165 #endif 166 {NULL, no_argument, NULL, 0} 167 }; 168 169 170 static void 171 add_pattern(char *pat, size_t len) 172 { 173 if (!xflag && (len == 0 || matchall)) { 174 matchall = 1; 175 return; 176 } 177 if (patterns == pattern_sz) { 178 pattern_sz *= 2; 179 pattern = grep_reallocarray(pattern, ++pattern_sz, sizeof(*pattern)); 180 } 181 if (len > 0 && pat[len - 1] == '\n') 182 --len; 183 /* pat may not be NUL-terminated */ 184 if (wflag && !Fflag) { 185 int bol = 0, eol = 0, extra; 186 if (pat[0] == '^') 187 bol = 1; 188 if (len > 0 && pat[len - 1] == '$') 189 eol = 1; 190 extra = Eflag ? 2 : 4; 191 pattern[patterns] = grep_malloc(len + 15 + extra); 192 snprintf(pattern[patterns], len + 15 + extra, 193 "%s[[:<:]]%s%.*s%s[[:>:]]%s", 194 bol ? "^" : "", 195 Eflag ? "(" : "\\(", 196 (int)len - bol - eol, pat + bol, 197 Eflag ? ")" : "\\)", 198 eol ? "$" : ""); 199 len += 14 + extra; 200 } else { 201 pattern[patterns] = grep_malloc(len + 1); 202 memcpy(pattern[patterns], pat, len); 203 pattern[patterns][len] = '\0'; 204 } 205 ++patterns; 206 } 207 208 static void 209 add_patterns(char *pats) 210 { 211 char *nl; 212 213 while ((nl = strchr(pats, '\n')) != NULL) { 214 add_pattern(pats, nl - pats); 215 pats = nl + 1; 216 } 217 add_pattern(pats, strlen(pats)); 218 } 219 220 static void 221 read_patterns(const char *fn) 222 { 223 FILE *f; 224 char *line; 225 ssize_t len; 226 size_t linesize; 227 228 if ((f = fopen(fn, "r")) == NULL) 229 err(2, "%s", fn); 230 line = NULL; 231 linesize = 0; 232 while ((len = getline(&line, &linesize, f)) != -1) 233 add_pattern(line, *line == '\n' ? 0 : len); 234 if (ferror(f)) 235 err(2, "%s", fn); 236 fclose(f); 237 free(line); 238 } 239 240 int 241 main(int argc, char *argv[]) 242 { 243 int c, lastc, prevoptind, newarg, i, needpattern, exprs, expr_sz; 244 struct patfile *patfile, *pf_next; 245 long l; 246 char **expr; 247 const char *errstr; 248 249 if (pledge("stdio rpath", NULL) == -1) 250 err(2, "pledge"); 251 252 SLIST_INIT(&patfilelh); 253 switch (__progname[0]) { 254 case 'e': 255 Eflag = 1; 256 break; 257 case 'f': 258 Fflag = 1; 259 break; 260 #ifndef NOZ 261 case 'z': 262 Zflag = 1; 263 switch(__progname[1]) { 264 case 'e': 265 Eflag = 1; 266 break; 267 case 'f': 268 Fflag = 1; 269 break; 270 } 271 break; 272 #endif 273 } 274 275 lastc = '\0'; 276 newarg = 1; 277 prevoptind = 1; 278 needpattern = 1; 279 expr_sz = exprs = 0; 280 expr = NULL; 281 while ((c = getopt_long(argc, argv, optstr, 282 long_options, NULL)) != -1) { 283 switch (c) { 284 case '0': case '1': case '2': case '3': case '4': 285 case '5': case '6': case '7': case '8': case '9': 286 if (newarg || !isdigit(lastc)) 287 Aflag = 0; 288 else if (Aflag > INT_MAX / 10) 289 errx(2, "context out of range"); 290 Aflag = Bflag = (Aflag * 10) + (c - '0'); 291 break; 292 case 'A': 293 case 'B': 294 l = strtonum(optarg, 1, INT_MAX, &errstr); 295 if (errstr != NULL) 296 errx(2, "context %s", errstr); 297 if (c == 'A') 298 Aflag = (int)l; 299 else 300 Bflag = (int)l; 301 break; 302 case 'C': 303 if (optarg == NULL) 304 Aflag = Bflag = 2; 305 else { 306 l = strtonum(optarg, 1, INT_MAX, &errstr); 307 if (errstr != NULL) 308 errx(2, "context %s", errstr); 309 Aflag = Bflag = (int)l; 310 } 311 break; 312 case 'E': 313 Fflag = 0; 314 Eflag = 1; 315 break; 316 case 'F': 317 Eflag = 0; 318 Fflag = 1; 319 break; 320 case 'G': 321 Eflag = Fflag = 0; 322 break; 323 case 'H': 324 Hflag = 1; 325 break; 326 case 'I': 327 binbehave = BIN_FILE_SKIP; 328 break; 329 case 'L': 330 lflag = 0; 331 Lflag = qflag = 1; 332 break; 333 case 'R': 334 case 'r': 335 Rflag = 1; 336 break; 337 case 'U': 338 binbehave = BIN_FILE_BIN; 339 break; 340 case 'V': 341 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); 342 exit(0); 343 break; 344 #ifndef NOZ 345 case 'Z': 346 Zflag = 1; 347 break; 348 #endif 349 case 'a': 350 binbehave = BIN_FILE_TEXT; 351 break; 352 case 'b': 353 bflag = 1; 354 break; 355 case 'c': 356 cflag = 1; 357 break; 358 case 'e': 359 /* defer adding of expressions until all arguments are parsed */ 360 if (exprs == expr_sz) { 361 expr_sz *= 2; 362 expr = grep_reallocarray(expr, ++expr_sz, 363 sizeof(*expr)); 364 } 365 needpattern = 0; 366 expr[exprs] = optarg; 367 ++exprs; 368 break; 369 case 'f': 370 patfile = grep_malloc(sizeof(*patfile)); 371 patfile->pf_file = optarg; 372 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next); 373 needpattern = 0; 374 break; 375 case 'h': 376 hflag = 1; 377 break; 378 case 'i': 379 case 'y': 380 iflag = 1; 381 cflags |= REG_ICASE; 382 break; 383 case 'l': 384 Lflag = 0; 385 lflag = qflag = 1; 386 break; 387 case 'm': 388 mflag = 1; 389 mlimit = mcount = strtonum(optarg, 0, LLONG_MAX, 390 &errstr); 391 if (errstr != NULL) 392 errx(2, "invalid max-count %s: %s", 393 optarg, errstr); 394 break; 395 case 'n': 396 nflag = 1; 397 break; 398 case 'o': 399 oflag = 1; 400 break; 401 case 'q': 402 qflag = 1; 403 break; 404 case 's': 405 sflag = 1; 406 break; 407 case 'v': 408 vflag = 1; 409 break; 410 case 'w': 411 wflag = 1; 412 break; 413 case 'x': 414 xflag = 1; 415 break; 416 case BIN_OPT: 417 if (strcmp("binary", optarg) == 0) 418 binbehave = BIN_FILE_BIN; 419 else if (strcmp("without-match", optarg) == 0) 420 binbehave = BIN_FILE_SKIP; 421 else if (strcmp("text", optarg) == 0) 422 binbehave = BIN_FILE_TEXT; 423 else 424 errx(2, "Unknown binary-files option"); 425 break; 426 case 'u': 427 case MMAP_OPT: 428 /* default, compatibility */ 429 break; 430 case LINEBUF_OPT: 431 lbflag = 1; 432 break; 433 case HELP_OPT: 434 default: 435 usage(); 436 } 437 lastc = c; 438 newarg = optind != prevoptind; 439 prevoptind = optind; 440 } 441 argc -= optind; 442 argv += optind; 443 444 for (i = 0; i < exprs; i++) 445 add_patterns(expr[i]); 446 free(expr); 447 expr = NULL; 448 449 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL; 450 patfile = pf_next) { 451 pf_next = SLIST_NEXT(patfile, pf_next); 452 read_patterns(patfile->pf_file); 453 free(patfile); 454 } 455 456 if (argc == 0 && needpattern) 457 usage(); 458 459 if (argc != 0 && needpattern) { 460 add_patterns(*argv); 461 --argc; 462 ++argv; 463 } 464 465 if (Rflag && argc == 0) 466 warnx("warning: recursive search of stdin"); 467 if (Eflag) 468 cflags |= REG_EXTENDED; 469 if (Fflag) 470 cflags |= REG_NOSPEC; 471 #ifdef SMALL 472 /* Sorry, this won't work */ 473 if (Fflag && wflag) 474 errx(1, "Can't use small fgrep with -w"); 475 #endif 476 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 477 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 478 for (i = 0; i < patterns; ++i) { 479 /* Check if cheating is allowed (always is for fgrep). */ 480 #ifndef SMALL 481 if (Fflag) { 482 fgrepcomp(&fg_pattern[i], pattern[i]); 483 } else 484 #endif 485 { 486 if (fastcomp(&fg_pattern[i], pattern[i])) { 487 /* Fall back to full regex library */ 488 c = regcomp(&r_pattern[i], pattern[i], cflags); 489 if (c != 0) { 490 regerror(c, &r_pattern[i], re_error, 491 RE_ERROR_BUF); 492 errx(2, "%s", re_error); 493 } 494 } 495 } 496 } 497 498 if (lbflag) 499 setvbuf(stdout, NULL, _IOLBF, 0); 500 501 if ((argc == 0 || argc == 1) && !Rflag && !Hflag) 502 hflag = 1; 503 504 if (argc == 0) 505 exit(!procfile(NULL)); 506 507 if (Rflag) 508 c = grep_tree(argv); 509 else 510 for (c = 0; argc--; ++argv) 511 c += procfile(*argv); 512 513 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); 514 } 515