1 /* $OpenBSD: grep.c,v 1.45 2012/12/29 01:32:44 millert Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 #ifndef __minix 31 #include <sys/limits.h> 32 #else 33 #include <limits.h> 34 #endif /* __minix */ 35 #include <sys/stat.h> 36 #include <sys/queue.h> 37 38 #include <ctype.h> 39 #include <err.h> 40 #include <errno.h> 41 #include <getopt.h> 42 #include <regex.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 48 #include "grep.h" 49 50 /* Flags passed to regcomp() and regexec() */ 51 int cflags; 52 int eflags = REG_STARTEND; 53 54 int matchall; /* shortcut */ 55 int patterns, pattern_sz; 56 char **pattern; 57 regex_t *r_pattern; 58 fastgrep_t *fg_pattern; 59 60 /* For regex errors */ 61 char re_error[RE_ERROR_BUF + 1]; 62 63 /* Command-line flags */ 64 int Aflag; /* -A x: print x lines trailing each match */ 65 int Bflag; /* -B x: print x lines leading each match */ 66 int Eflag; /* -E: interpret pattern as extended regexp */ 67 int Fflag; /* -F: interpret pattern as list of fixed strings */ 68 int Gflag; /* -G: interpret pattern as basic regexp */ 69 int Hflag; /* -H: always print filename header */ 70 int Lflag; /* -L: only show names of files with no matches */ 71 int Rflag; /* -R: recursively search directory trees */ 72 #ifndef NOZ 73 int Zflag; /* -Z: decompress input before processing */ 74 #endif 75 int bflag; /* -b: show block numbers for each match */ 76 int cflag; /* -c: only show a count of matching lines */ 77 int hflag; /* -h: don't print filename headers */ 78 int iflag; /* -i: ignore case */ 79 int lflag; /* -l: only show names of files with matches */ 80 int nflag; /* -n: show line numbers in front of matching lines */ 81 int oflag; /* -o: print each match */ 82 int qflag; /* -q: quiet mode (don't output anything) */ 83 int sflag; /* -s: silent mode (ignore errors) */ 84 int vflag; /* -v: only show non-matching lines */ 85 int wflag; /* -w: pattern must start and end on word boundaries */ 86 int xflag; /* -x: pattern must match entire line */ 87 int lbflag; /* --line-buffered */ 88 89 int binbehave = BIN_FILE_BIN; 90 91 enum { 92 BIN_OPT = CHAR_MAX + 1, 93 HELP_OPT, 94 MMAP_OPT, 95 LINEBUF_OPT 96 }; 97 98 /* Housekeeping */ 99 int first; /* flag whether or not this is our first match */ 100 int tail; /* lines left to print */ 101 int file_err; /* file reading error */ 102 103 struct patfile { 104 const char *pf_file; 105 SLIST_ENTRY(patfile) pf_next; 106 }; 107 SLIST_HEAD(, patfile) patfilelh; 108 109 extern char *__progname; 110 111 static void __dead 112 usage(void) 113 { 114 fprintf(stderr, 115 #ifdef NOZ 116 "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]\n" 117 #else 118 "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n" 119 #endif 120 "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n" 121 "\t[--line-buffered] [pattern] [file ...]\n", __progname); 122 exit(2); 123 } 124 125 #ifdef NOZ 126 static const char *optstr = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy"; 127 #else 128 static const char *optstr = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy"; 129 #endif 130 131 struct option long_options[] = 132 { 133 {"binary-files", required_argument, NULL, BIN_OPT}, 134 {"help", no_argument, NULL, HELP_OPT}, 135 {"mmap", no_argument, NULL, MMAP_OPT}, 136 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 137 {"after-context", required_argument, NULL, 'A'}, 138 {"before-context", required_argument, NULL, 'B'}, 139 {"context", optional_argument, NULL, 'C'}, 140 {"devices", required_argument, NULL, 'D'}, 141 {"extended-regexp", no_argument, NULL, 'E'}, 142 {"fixed-strings", no_argument, NULL, 'F'}, 143 {"basic-regexp", no_argument, NULL, 'G'}, 144 {"with-filename", no_argument, NULL, 'H'}, 145 {"binary", no_argument, NULL, 'U'}, 146 {"version", no_argument, NULL, 'V'}, 147 {"text", no_argument, NULL, 'a'}, 148 {"byte-offset", no_argument, NULL, 'b'}, 149 {"count", no_argument, NULL, 'c'}, 150 {"regexp", required_argument, NULL, 'e'}, 151 {"file", required_argument, NULL, 'f'}, 152 {"no-filename", no_argument, NULL, 'h'}, 153 {"ignore-case", no_argument, NULL, 'i'}, 154 {"files-without-match", no_argument, NULL, 'L'}, 155 {"files-with-matches", no_argument, NULL, 'l'}, 156 {"line-number", no_argument, NULL, 'n'}, 157 {"quiet", no_argument, NULL, 'q'}, 158 {"silent", no_argument, NULL, 'q'}, 159 {"recursive", no_argument, NULL, 'r'}, 160 {"no-messages", no_argument, NULL, 's'}, 161 {"revert-match", no_argument, NULL, 'v'}, 162 {"word-regexp", no_argument, NULL, 'w'}, 163 {"line-regexp", no_argument, NULL, 'x'}, 164 {"unix-byte-offsets", no_argument, NULL, 'u'}, 165 #ifndef NOZ 166 {"decompress", no_argument, NULL, 'Z'}, 167 #endif 168 {NULL, no_argument, NULL, 0} 169 }; 170 171 172 static void 173 add_pattern(char *pat, size_t len) 174 { 175 if (!xflag && (len == 0 || matchall)) { 176 matchall = 1; 177 return; 178 } 179 if (patterns == pattern_sz) { 180 pattern_sz *= 2; 181 pattern = grep_realloc(pattern, ++pattern_sz * sizeof(*pattern)); 182 } 183 if (len > 0 && pat[len - 1] == '\n') 184 --len; 185 /* pat may not be NUL-terminated */ 186 if (wflag && !Fflag) { 187 int bol = 0, eol = 0, extra; 188 if (pat[0] == '^') 189 bol = 1; 190 if (len > 0 && pat[len - 1] == '$') 191 eol = 1; 192 extra = Eflag ? 2 : 4; 193 pattern[patterns] = grep_malloc(len + 15 + extra); 194 snprintf(pattern[patterns], len + 15 + extra, 195 "%s[[:<:]]%s%.*s%s[[:>:]]%s", 196 bol ? "^" : "", 197 Eflag ? "(" : "\\(", 198 (int)len - bol - eol, pat + bol, 199 Eflag ? ")" : "\\)", 200 eol ? "$" : ""); 201 len += 14 + extra; 202 } else { 203 pattern[patterns] = grep_malloc(len + 1); 204 memcpy(pattern[patterns], pat, len); 205 pattern[patterns][len] = '\0'; 206 } 207 ++patterns; 208 } 209 210 static void 211 add_patterns(char *pats) 212 { 213 char *nl; 214 215 while ((nl = strchr(pats, '\n')) != NULL) { 216 add_pattern(pats, nl - pats); 217 pats = nl + 1; 218 } 219 add_pattern(pats, strlen(pats)); 220 } 221 222 static void 223 read_patterns(const char *fn) 224 { 225 FILE *f; 226 char *line; 227 size_t len; 228 229 if ((f = fopen(fn, "r")) == NULL) 230 err(2, "%s", fn); 231 while ((line = fgetln(f, &len)) != NULL) 232 add_pattern(line, *line == '\n' ? 0 : len); 233 if (ferror(f)) 234 err(2, "%s", fn); 235 fclose(f); 236 } 237 238 int 239 main(int argc, char *argv[]) 240 { 241 int c, lastc, prevoptind, newarg, i, needpattern, exprs, expr_sz; 242 struct patfile *patfile, *pf_next; 243 long l; 244 char *ep, **expr; 245 246 SLIST_INIT(&patfilelh); 247 switch (__progname[0]) { 248 case 'e': 249 Eflag++; 250 break; 251 case 'f': 252 Fflag++; 253 break; 254 case 'g': 255 Gflag++; 256 break; 257 #ifndef NOZ 258 case 'z': 259 Zflag++; 260 switch(__progname[1]) { 261 case 'e': 262 Eflag++; 263 break; 264 case 'f': 265 Fflag++; 266 break; 267 case 'g': 268 Gflag++; 269 break; 270 } 271 break; 272 #endif 273 } 274 275 lastc = '\0'; 276 newarg = 1; 277 prevoptind = 1; 278 needpattern = 1; 279 expr_sz = exprs = 0; 280 expr = NULL; 281 while ((c = getopt_long(argc, argv, optstr, 282 long_options, NULL)) != -1) { 283 switch (c) { 284 case '0': case '1': case '2': case '3': case '4': 285 case '5': case '6': case '7': case '8': case '9': 286 if (newarg || !isdigit(lastc)) 287 Aflag = 0; 288 else if (Aflag > INT_MAX / 10) 289 errx(2, "context out of range"); 290 Aflag = Bflag = (Aflag * 10) + (c - '0'); 291 break; 292 case 'A': 293 case 'B': 294 l = strtol(optarg, &ep, 10); 295 if (ep == optarg || *ep != '\0' || 296 l <= 0 || l >= INT_MAX) 297 errx(2, "context out of range"); 298 if (c == 'A') 299 Aflag = (int)l; 300 else 301 Bflag = (int)l; 302 break; 303 case 'C': 304 if (optarg == NULL) 305 Aflag = Bflag = 2; 306 else { 307 l = strtol(optarg, &ep, 10); 308 if (ep == optarg || *ep != '\0' || 309 l <= 0 || l >= INT_MAX) 310 errx(2, "context out of range"); 311 Aflag = Bflag = (int)l; 312 } 313 break; 314 case 'E': 315 Fflag = Gflag = 0; 316 Eflag++; 317 break; 318 case 'F': 319 Eflag = Gflag = 0; 320 Fflag++; 321 break; 322 case 'G': 323 Eflag = Fflag = 0; 324 Gflag++; 325 break; 326 case 'H': 327 Hflag++; 328 break; 329 case 'I': 330 binbehave = BIN_FILE_SKIP; 331 break; 332 case 'L': 333 lflag = 0; 334 Lflag = qflag = 1; 335 break; 336 case 'R': 337 case 'r': 338 Rflag++; 339 break; 340 case 'U': 341 binbehave = BIN_FILE_BIN; 342 break; 343 case 'V': 344 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); 345 exit(0); 346 break; 347 #ifndef NOZ 348 case 'Z': 349 Zflag++; 350 break; 351 #endif 352 case 'a': 353 binbehave = BIN_FILE_TEXT; 354 break; 355 case 'b': 356 bflag = 1; 357 break; 358 case 'c': 359 cflag = 1; 360 break; 361 case 'e': 362 /* defer adding of expressions until all arguments are parsed */ 363 if (exprs == expr_sz) { 364 expr_sz *= 2; 365 expr = grep_realloc(expr, ++expr_sz * sizeof(*expr)); 366 } 367 needpattern = 0; 368 expr[exprs] = optarg; 369 ++exprs; 370 break; 371 case 'f': 372 patfile = grep_malloc(sizeof(*patfile)); 373 patfile->pf_file = optarg; 374 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next); 375 needpattern = 0; 376 break; 377 case 'h': 378 hflag = 1; 379 break; 380 case 'i': 381 case 'y': 382 iflag = 1; 383 cflags |= REG_ICASE; 384 break; 385 case 'l': 386 Lflag = 0; 387 lflag = qflag = 1; 388 break; 389 case 'n': 390 nflag = 1; 391 break; 392 case 'o': 393 oflag = 1; 394 break; 395 case 'q': 396 qflag = 1; 397 break; 398 case 's': 399 sflag = 1; 400 break; 401 case 'v': 402 vflag = 1; 403 break; 404 case 'w': 405 wflag = 1; 406 break; 407 case 'x': 408 xflag = 1; 409 break; 410 case BIN_OPT: 411 if (strcmp("binary", optarg) == 0) 412 binbehave = BIN_FILE_BIN; 413 else if (strcmp("without-match", optarg) == 0) 414 binbehave = BIN_FILE_SKIP; 415 else if (strcmp("text", optarg) == 0) 416 binbehave = BIN_FILE_TEXT; 417 else 418 errx(2, "Unknown binary-files option"); 419 break; 420 case 'u': 421 case MMAP_OPT: 422 /* default, compatibility */ 423 break; 424 case LINEBUF_OPT: 425 lbflag = 1; 426 break; 427 case HELP_OPT: 428 default: 429 usage(); 430 } 431 lastc = c; 432 newarg = optind != prevoptind; 433 prevoptind = optind; 434 } 435 argc -= optind; 436 argv += optind; 437 438 for (i = 0; i < exprs; i++) 439 add_patterns(expr[i]); 440 free(expr); 441 expr = NULL; 442 443 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL; 444 patfile = pf_next) { 445 pf_next = SLIST_NEXT(patfile, pf_next); 446 read_patterns(patfile->pf_file); 447 free(patfile); 448 } 449 450 if (argc == 0 && needpattern) 451 usage(); 452 453 if (argc != 0 && needpattern) { 454 add_patterns(*argv); 455 --argc; 456 ++argv; 457 } 458 459 if (Eflag) 460 cflags |= REG_EXTENDED; 461 if (Fflag) 462 cflags |= REG_NOSPEC; 463 #ifdef SMALL 464 /* Sorry, this won't work */ 465 if (Fflag && wflag) 466 errx(1, "Can't use small fgrep with -w"); 467 #endif 468 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 469 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 470 for (i = 0; i < patterns; ++i) { 471 /* Check if cheating is allowed (always is for fgrep). */ 472 #ifndef SMALL 473 if (Fflag) { 474 fgrepcomp(&fg_pattern[i], (unsigned char *)pattern[i]); 475 } else 476 #endif 477 { 478 if (fastcomp(&fg_pattern[i], pattern[i])) { 479 /* Fall back to full regex library */ 480 c = regcomp(&r_pattern[i], pattern[i], cflags); 481 if (c != 0) { 482 regerror(c, &r_pattern[i], re_error, 483 RE_ERROR_BUF); 484 errx(2, "%s", re_error); 485 } 486 } 487 } 488 } 489 490 if (lbflag) 491 setlinebuf(stdout); 492 493 if ((argc == 0 || argc == 1) && !Rflag && !Hflag) 494 hflag = 1; 495 496 if (argc == 0) 497 exit(!procfile(NULL)); 498 499 if (Rflag) 500 c = grep_tree(argv); 501 else 502 for (c = 0; argc--; ++argv) 503 c += procfile(*argv); 504 505 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); 506 } 507