1 /* $OpenBSD: grep.c,v 1.44 2011/07/08 01:20:24 tedu Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/limits.h> 31 #include <sys/stat.h> 32 #include <sys/queue.h> 33 34 #include <ctype.h> 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <regex.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "grep.h" 45 46 /* Flags passed to regcomp() and regexec() */ 47 int cflags; 48 int eflags = REG_STARTEND; 49 50 int matchall; /* shortcut */ 51 int patterns, pattern_sz; 52 char **pattern; 53 regex_t *r_pattern; 54 fastgrep_t *fg_pattern; 55 56 /* For regex errors */ 57 char re_error[RE_ERROR_BUF + 1]; 58 59 /* Command-line flags */ 60 int Aflag; /* -A x: print x lines trailing each match */ 61 int Bflag; /* -B x: print x lines leading each match */ 62 int Eflag; /* -E: interpret pattern as extended regexp */ 63 int Fflag; /* -F: interpret pattern as list of fixed strings */ 64 int Gflag; /* -G: interpret pattern as basic regexp */ 65 int Hflag; /* -H: always print filename header */ 66 int Lflag; /* -L: only show names of files with no matches */ 67 int Rflag; /* -R: recursively search directory trees */ 68 #ifndef NOZ 69 int Zflag; /* -Z: decompress input before processing */ 70 #endif 71 int bflag; /* -b: show block numbers for each match */ 72 int cflag; /* -c: only show a count of matching lines */ 73 int hflag; /* -h: don't print filename headers */ 74 int iflag; /* -i: ignore case */ 75 int lflag; /* -l: only show names of files with matches */ 76 int nflag; /* -n: show line numbers in front of matching lines */ 77 int oflag; /* -o: print each match */ 78 int qflag; /* -q: quiet mode (don't output anything) */ 79 int sflag; /* -s: silent mode (ignore errors) */ 80 int vflag; /* -v: only show non-matching lines */ 81 int wflag; /* -w: pattern must start and end on word boundaries */ 82 int xflag; /* -x: pattern must match entire line */ 83 int lbflag; /* --line-buffered */ 84 85 int binbehave = BIN_FILE_BIN; 86 87 enum { 88 BIN_OPT = CHAR_MAX + 1, 89 HELP_OPT, 90 MMAP_OPT, 91 LINEBUF_OPT 92 }; 93 94 /* Housekeeping */ 95 int first; /* flag whether or not this is our first match */ 96 int tail; /* lines left to print */ 97 98 struct patfile { 99 const char *pf_file; 100 SLIST_ENTRY(patfile) pf_next; 101 }; 102 SLIST_HEAD(, patfile) patfilelh; 103 104 extern char *__progname; 105 106 static void 107 usage(void) 108 { 109 fprintf(stderr, 110 #ifdef NOZ 111 "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]\n" 112 #else 113 "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]\n" 114 #endif 115 "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n" 116 "\t[--line-buffered] [pattern] [file ...]\n", __progname); 117 exit(2); 118 } 119 120 #ifdef NOZ 121 static char *optstr = "0123456789A:B:CEFGHILRUVabce:f:hilnoqrsuvwxy"; 122 #else 123 static char *optstr = "0123456789A:B:CEFGHILRUVZabce:f:hilnoqrsuvwxy"; 124 #endif 125 126 struct option long_options[] = 127 { 128 {"binary-files", required_argument, NULL, BIN_OPT}, 129 {"help", no_argument, NULL, HELP_OPT}, 130 {"mmap", no_argument, NULL, MMAP_OPT}, 131 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 132 {"after-context", required_argument, NULL, 'A'}, 133 {"before-context", required_argument, NULL, 'B'}, 134 {"context", optional_argument, NULL, 'C'}, 135 {"devices", required_argument, NULL, 'D'}, 136 {"extended-regexp", no_argument, NULL, 'E'}, 137 {"fixed-strings", no_argument, NULL, 'F'}, 138 {"basic-regexp", no_argument, NULL, 'G'}, 139 {"with-filename", no_argument, NULL, 'H'}, 140 {"binary", no_argument, NULL, 'U'}, 141 {"version", no_argument, NULL, 'V'}, 142 {"text", no_argument, NULL, 'a'}, 143 {"byte-offset", no_argument, NULL, 'b'}, 144 {"count", no_argument, NULL, 'c'}, 145 {"regexp", required_argument, NULL, 'e'}, 146 {"file", required_argument, NULL, 'f'}, 147 {"no-filename", no_argument, NULL, 'h'}, 148 {"ignore-case", no_argument, NULL, 'i'}, 149 {"files-without-match", no_argument, NULL, 'L'}, 150 {"files-with-matches", no_argument, NULL, 'l'}, 151 {"line-number", no_argument, NULL, 'n'}, 152 {"quiet", no_argument, NULL, 'q'}, 153 {"silent", no_argument, NULL, 'q'}, 154 {"recursive", no_argument, NULL, 'r'}, 155 {"no-messages", no_argument, NULL, 's'}, 156 {"revert-match", no_argument, NULL, 'v'}, 157 {"word-regexp", no_argument, NULL, 'w'}, 158 {"line-regexp", no_argument, NULL, 'x'}, 159 {"unix-byte-offsets", no_argument, NULL, 'u'}, 160 #ifndef NOZ 161 {"decompress", no_argument, NULL, 'Z'}, 162 #endif 163 {NULL, no_argument, NULL, 0} 164 }; 165 166 167 static void 168 add_pattern(char *pat, size_t len) 169 { 170 if (!xflag && (len == 0 || matchall)) { 171 matchall = 1; 172 return; 173 } 174 if (patterns == pattern_sz) { 175 pattern_sz *= 2; 176 pattern = grep_realloc(pattern, ++pattern_sz * sizeof(*pattern)); 177 } 178 if (len > 0 && pat[len - 1] == '\n') 179 --len; 180 /* pat may not be NUL-terminated */ 181 if (wflag && !Fflag) { 182 int bol = 0, eol = 0, extra; 183 if (pat[0] == '^') 184 bol = 1; 185 if (len > 0 && pat[len - 1] == '$') 186 eol = 1; 187 extra = Eflag ? 2 : 4; 188 pattern[patterns] = grep_malloc(len + 15 + extra); 189 snprintf(pattern[patterns], len + 15 + extra, 190 "%s[[:<:]]%s%.*s%s[[:>:]]%s", 191 bol ? "^" : "", 192 Eflag ? "(" : "\\(", 193 (int)len - bol - eol, pat + bol, 194 Eflag ? ")" : "\\)", 195 eol ? "$" : ""); 196 len += 14 + extra; 197 } else { 198 pattern[patterns] = grep_malloc(len + 1); 199 memcpy(pattern[patterns], pat, len); 200 pattern[patterns][len] = '\0'; 201 } 202 ++patterns; 203 } 204 205 static void 206 add_patterns(char *pats) 207 { 208 char *nl; 209 210 while ((nl = strchr(pats, '\n')) != NULL) { 211 add_pattern(pats, nl - pats); 212 pats = nl + 1; 213 } 214 add_pattern(pats, strlen(pats)); 215 } 216 217 static void 218 read_patterns(const char *fn) 219 { 220 FILE *f; 221 char *line; 222 size_t len; 223 224 if ((f = fopen(fn, "r")) == NULL) 225 err(2, "%s", fn); 226 while ((line = fgetln(f, &len)) != NULL) 227 add_pattern(line, *line == '\n' ? 0 : len); 228 if (ferror(f)) 229 err(2, "%s", fn); 230 fclose(f); 231 } 232 233 int 234 main(int argc, char *argv[]) 235 { 236 int c, lastc, prevoptind, newarg, i, needpattern, exprs, expr_sz; 237 struct patfile *patfile, *pf_next; 238 long l; 239 char *ep, **expr; 240 241 SLIST_INIT(&patfilelh); 242 switch (__progname[0]) { 243 case 'e': 244 Eflag++; 245 break; 246 case 'f': 247 Fflag++; 248 break; 249 case 'g': 250 Gflag++; 251 break; 252 #ifndef NOZ 253 case 'z': 254 Zflag++; 255 switch(__progname[1]) { 256 case 'e': 257 Eflag++; 258 break; 259 case 'f': 260 Fflag++; 261 break; 262 case 'g': 263 Gflag++; 264 break; 265 } 266 break; 267 #endif 268 } 269 270 lastc = '\0'; 271 newarg = 1; 272 prevoptind = 1; 273 needpattern = 1; 274 expr_sz = exprs = 0; 275 expr = NULL; 276 while ((c = getopt_long(argc, argv, optstr, 277 long_options, NULL)) != -1) { 278 switch (c) { 279 case '0': case '1': case '2': case '3': case '4': 280 case '5': case '6': case '7': case '8': case '9': 281 if (newarg || !isdigit(lastc)) 282 Aflag = 0; 283 else if (Aflag > INT_MAX / 10) 284 errx(2, "context out of range"); 285 Aflag = Bflag = (Aflag * 10) + (c - '0'); 286 break; 287 case 'A': 288 case 'B': 289 l = strtol(optarg, &ep, 10); 290 if (ep == optarg || *ep != '\0' || 291 l <= 0 || l >= INT_MAX) 292 errx(2, "context out of range"); 293 if (c == 'A') 294 Aflag = (int)l; 295 else 296 Bflag = (int)l; 297 break; 298 case 'C': 299 if (optarg == NULL) 300 Aflag = Bflag = 2; 301 else { 302 l = strtol(optarg, &ep, 10); 303 if (ep == optarg || *ep != '\0' || 304 l <= 0 || l >= INT_MAX) 305 errx(2, "context out of range"); 306 Aflag = Bflag = (int)l; 307 } 308 break; 309 case 'E': 310 Fflag = Gflag = 0; 311 Eflag++; 312 break; 313 case 'F': 314 Eflag = Gflag = 0; 315 Fflag++; 316 break; 317 case 'G': 318 Eflag = Fflag = 0; 319 Gflag++; 320 break; 321 case 'H': 322 Hflag++; 323 break; 324 case 'I': 325 binbehave = BIN_FILE_SKIP; 326 break; 327 case 'L': 328 lflag = 0; 329 Lflag = qflag = 1; 330 break; 331 case 'R': 332 case 'r': 333 Rflag++; 334 break; 335 case 'U': 336 binbehave = BIN_FILE_BIN; 337 break; 338 case 'V': 339 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); 340 exit(0); 341 break; 342 #ifndef NOZ 343 case 'Z': 344 Zflag++; 345 break; 346 #endif 347 case 'a': 348 binbehave = BIN_FILE_TEXT; 349 break; 350 case 'b': 351 bflag = 1; 352 break; 353 case 'c': 354 cflag = 1; 355 break; 356 case 'e': 357 /* defer adding of expressions until all arguments are parsed */ 358 if (exprs == expr_sz) { 359 expr_sz *= 2; 360 expr = grep_realloc(expr, ++expr_sz * sizeof(*expr)); 361 } 362 needpattern = 0; 363 expr[exprs] = optarg; 364 ++exprs; 365 break; 366 case 'f': 367 patfile = grep_malloc(sizeof(*patfile)); 368 patfile->pf_file = optarg; 369 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next); 370 needpattern = 0; 371 break; 372 case 'h': 373 hflag = 1; 374 break; 375 case 'i': 376 case 'y': 377 iflag = 1; 378 cflags |= REG_ICASE; 379 break; 380 case 'l': 381 Lflag = 0; 382 lflag = qflag = 1; 383 break; 384 case 'n': 385 nflag = 1; 386 break; 387 case 'o': 388 oflag = 1; 389 break; 390 case 'q': 391 qflag = 1; 392 break; 393 case 's': 394 sflag = 1; 395 break; 396 case 'v': 397 vflag = 1; 398 break; 399 case 'w': 400 wflag = 1; 401 break; 402 case 'x': 403 xflag = 1; 404 break; 405 case BIN_OPT: 406 if (strcmp("binary", optarg) == 0) 407 binbehave = BIN_FILE_BIN; 408 else if (strcmp("without-match", optarg) == 0) 409 binbehave = BIN_FILE_SKIP; 410 else if (strcmp("text", optarg) == 0) 411 binbehave = BIN_FILE_TEXT; 412 else 413 errx(2, "Unknown binary-files option"); 414 break; 415 case 'u': 416 case MMAP_OPT: 417 /* default, compatibility */ 418 break; 419 case LINEBUF_OPT: 420 lbflag = 1; 421 break; 422 case HELP_OPT: 423 default: 424 usage(); 425 } 426 lastc = c; 427 newarg = optind != prevoptind; 428 prevoptind = optind; 429 } 430 argc -= optind; 431 argv += optind; 432 433 for (i = 0; i < exprs; i++) 434 add_patterns(expr[i]); 435 free(expr); 436 expr = NULL; 437 438 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL; 439 patfile = pf_next) { 440 pf_next = SLIST_NEXT(patfile, pf_next); 441 read_patterns(patfile->pf_file); 442 free(patfile); 443 } 444 445 if (argc == 0 && needpattern) 446 usage(); 447 448 if (argc != 0 && needpattern) { 449 add_patterns(*argv); 450 --argc; 451 ++argv; 452 } 453 454 if (Eflag) 455 cflags |= REG_EXTENDED; 456 if (Fflag) 457 cflags |= REG_NOSPEC; 458 #ifdef SMALL 459 /* Sorry, this won't work */ 460 if (Fflag && wflag) 461 errx(1, "Can't use small fgrep with -w"); 462 #endif 463 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 464 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 465 for (i = 0; i < patterns; ++i) { 466 /* Check if cheating is allowed (always is for fgrep). */ 467 #ifndef SMALL 468 if (Fflag) { 469 fgrepcomp(&fg_pattern[i], pattern[i]); 470 } else 471 #endif 472 { 473 if (fastcomp(&fg_pattern[i], pattern[i])) { 474 /* Fall back to full regex library */ 475 c = regcomp(&r_pattern[i], pattern[i], cflags); 476 if (c != 0) { 477 regerror(c, &r_pattern[i], re_error, 478 RE_ERROR_BUF); 479 errx(2, "%s", re_error); 480 } 481 } 482 } 483 } 484 485 if (lbflag) 486 setlinebuf(stdout); 487 488 if ((argc == 0 || argc == 1) && !Rflag && !Hflag) 489 hflag = 1; 490 491 if (argc == 0) 492 exit(!procfile(NULL)); 493 494 if (Rflag) 495 c = grep_tree(argv); 496 else 497 for (c = 0; argc--; ++argv) 498 c += procfile(*argv); 499 500 exit(!c); 501 } 502