1 /* $OpenBSD: grep.c,v 1.39 2007/09/02 15:19:32 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/limits.h> 31 #include <sys/stat.h> 32 #include <sys/queue.h> 33 34 #include <ctype.h> 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <regex.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "grep.h" 45 46 /* Flags passed to regcomp() and regexec() */ 47 int cflags; 48 int eflags = REG_STARTEND; 49 50 int matchall; /* shortcut */ 51 int patterns, pattern_sz; 52 char **pattern; 53 regex_t *r_pattern; 54 fastgrep_t *fg_pattern; 55 56 /* For regex errors */ 57 char re_error[RE_ERROR_BUF + 1]; 58 59 /* Command-line flags */ 60 int Aflag; /* -A x: print x lines trailing each match */ 61 int Bflag; /* -B x: print x lines leading each match */ 62 int Eflag; /* -E: interpret pattern as extended regexp */ 63 int Fflag; /* -F: interpret pattern as list of fixed strings */ 64 int Gflag; /* -G: interpret pattern as basic regexp */ 65 int Hflag; /* -H: if -R, follow explicitly listed symlinks */ 66 int Lflag; /* -L: only show names of files with no matches */ 67 int Pflag; /* -P: if -R, no symlinks are followed */ 68 int Rflag; /* -R: recursively search directory trees */ 69 int Sflag; /* -S: if -R, follow all symlinks */ 70 #ifndef NOZ 71 int Zflag; /* -Z: decompress input before processing */ 72 #endif 73 int bflag; /* -b: show block numbers for each match */ 74 int cflag; /* -c: only show a count of matching lines */ 75 int hflag; /* -h: don't print filename headers */ 76 int iflag; /* -i: ignore case */ 77 int lflag; /* -l: only show names of files with matches */ 78 int nflag; /* -n: show line numbers in front of matching lines */ 79 int oflag; /* -o: always print file name */ 80 int qflag; /* -q: quiet mode (don't output anything) */ 81 int sflag; /* -s: silent mode (ignore errors) */ 82 int vflag; /* -v: only show non-matching lines */ 83 int wflag; /* -w: pattern must start and end on word boundaries */ 84 int xflag; /* -x: pattern must match entire line */ 85 int lbflag; /* --line-buffered */ 86 87 int binbehave = BIN_FILE_BIN; 88 89 enum { 90 BIN_OPT = CHAR_MAX + 1, 91 HELP_OPT, 92 MMAP_OPT, 93 LINEBUF_OPT 94 }; 95 96 /* Housekeeping */ 97 int first; /* flag whether or not this is our first match */ 98 int tail; /* lines left to print */ 99 100 struct patfile { 101 const char *pf_file; 102 SLIST_ENTRY(patfile) pf_next; 103 }; 104 SLIST_HEAD(, patfile) patfilelh; 105 106 extern char *__progname; 107 108 static void 109 usage(void) 110 { 111 fprintf(stderr, 112 #ifdef NOZ 113 "usage: %s [-abcEFGHhIiLlnoPqRSsUVvwx] [-A num] [-B num] [-C[num]]\n" 114 #else 115 "usage: %s [-abcEFGHhIiLlnoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n" 116 #endif 117 "\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n" 118 "\t[--line-buffered] [pattern] [file ...]\n", __progname); 119 exit(2); 120 } 121 122 #ifdef NOZ 123 static char *optstr = "0123456789A:B:CEFGHILPSRUVabce:f:hilnoqrsuvwxy"; 124 #else 125 static char *optstr = "0123456789A:B:CEFGHILPSRUVZabce:f:hilnoqrsuvwxy"; 126 #endif 127 128 struct option long_options[] = 129 { 130 {"binary-files", required_argument, NULL, BIN_OPT}, 131 {"help", no_argument, NULL, HELP_OPT}, 132 {"mmap", no_argument, NULL, MMAP_OPT}, 133 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 134 {"after-context", required_argument, NULL, 'A'}, 135 {"before-context", required_argument, NULL, 'B'}, 136 {"context", optional_argument, NULL, 'C'}, 137 {"devices", required_argument, NULL, 'D'}, 138 {"extended-regexp", no_argument, NULL, 'E'}, 139 {"fixed-strings", no_argument, NULL, 'F'}, 140 {"basic-regexp", no_argument, NULL, 'G'}, 141 {"binary", no_argument, NULL, 'U'}, 142 {"version", no_argument, NULL, 'V'}, 143 {"text", no_argument, NULL, 'a'}, 144 {"byte-offset", no_argument, NULL, 'b'}, 145 {"count", no_argument, NULL, 'c'}, 146 {"regexp", required_argument, NULL, 'e'}, 147 {"file", required_argument, NULL, 'f'}, 148 {"no-filename", no_argument, NULL, 'h'}, 149 {"ignore-case", no_argument, NULL, 'i'}, 150 {"files-without-match", no_argument, NULL, 'L'}, 151 {"files-with-matches", no_argument, NULL, 'l'}, 152 {"line-number", no_argument, NULL, 'n'}, 153 {"quiet", no_argument, NULL, 'q'}, 154 {"silent", no_argument, NULL, 'q'}, 155 {"recursive", no_argument, NULL, 'r'}, 156 {"no-messages", no_argument, NULL, 's'}, 157 {"revert-match", no_argument, NULL, 'v'}, 158 {"word-regexp", no_argument, NULL, 'w'}, 159 {"line-regexp", no_argument, NULL, 'x'}, 160 {"unix-byte-offsets", no_argument, NULL, 'u'}, 161 #ifndef NOZ 162 {"decompress", no_argument, NULL, 'Z'}, 163 #endif 164 {NULL, no_argument, NULL, 0} 165 }; 166 167 168 static void 169 add_pattern(char *pat, size_t len) 170 { 171 if (!xflag && (len == 0 || matchall)) { 172 matchall = 1; 173 return; 174 } 175 if (patterns == pattern_sz) { 176 pattern_sz *= 2; 177 pattern = grep_realloc(pattern, ++pattern_sz * sizeof(*pattern)); 178 } 179 if (len > 0 && pat[len - 1] == '\n') 180 --len; 181 /* pat may not be NUL-terminated */ 182 if (wflag && !Fflag) { 183 int bol = 0, eol = 0, extra; 184 if (pat[0] == '^') 185 bol = 1; 186 if (len > 0 && pat[len - 1] == '$') 187 eol = 1; 188 extra = Eflag ? 2 : 4; 189 pattern[patterns] = grep_malloc(len + 15 + extra); 190 snprintf(pattern[patterns], len + 15 + extra, 191 "%s[[:<:]]%s%.*s%s[[:>:]]%s", 192 bol ? "^" : "", 193 Eflag ? "(" : "\\(", 194 (int)len - bol - eol, pat + bol, 195 Eflag ? ")" : "\\)", 196 eol ? "$" : ""); 197 len += 14 + extra; 198 } else { 199 pattern[patterns] = grep_malloc(len + 1); 200 memcpy(pattern[patterns], pat, len); 201 pattern[patterns][len] = '\0'; 202 } 203 ++patterns; 204 } 205 206 static void 207 add_patterns(char *pats) 208 { 209 char *nl; 210 211 while ((nl = strchr(pats, '\n')) != NULL) { 212 add_pattern(pats, nl - pats); 213 pats = nl + 1; 214 } 215 add_pattern(pats, strlen(pats)); 216 } 217 218 static void 219 read_patterns(const char *fn) 220 { 221 FILE *f; 222 char *line; 223 size_t len; 224 225 if ((f = fopen(fn, "r")) == NULL) 226 err(2, "%s", fn); 227 while ((line = fgetln(f, &len)) != NULL) 228 add_pattern(line, *line == '\n' ? 0 : len); 229 if (ferror(f)) 230 err(2, "%s", fn); 231 fclose(f); 232 } 233 234 int 235 main(int argc, char *argv[]) 236 { 237 int c, lastc, prevoptind, newarg, i, needpattern; 238 struct patfile *patfile, *pf_next; 239 long l; 240 char *ep; 241 242 SLIST_INIT(&patfilelh); 243 switch (__progname[0]) { 244 case 'e': 245 Eflag++; 246 break; 247 case 'f': 248 Fflag++; 249 break; 250 case 'g': 251 Gflag++; 252 break; 253 #ifndef NOZ 254 case 'z': 255 Zflag++; 256 switch(__progname[1]) { 257 case 'e': 258 Eflag++; 259 break; 260 case 'f': 261 Fflag++; 262 break; 263 case 'g': 264 Gflag++; 265 break; 266 } 267 break; 268 #endif 269 } 270 271 lastc = '\0'; 272 newarg = 1; 273 prevoptind = 1; 274 needpattern = 1; 275 while ((c = getopt_long(argc, argv, optstr, 276 long_options, NULL)) != -1) { 277 switch (c) { 278 case '0': case '1': case '2': case '3': case '4': 279 case '5': case '6': case '7': case '8': case '9': 280 if (newarg || !isdigit(lastc)) 281 Aflag = 0; 282 else if (Aflag > INT_MAX / 10) 283 errx(2, "context out of range"); 284 Aflag = Bflag = (Aflag * 10) + (c - '0'); 285 break; 286 case 'A': 287 case 'B': 288 l = strtol(optarg, &ep, 10); 289 if (ep == optarg || *ep != '\0' || 290 l <= 0 || l >= INT_MAX) 291 errx(2, "context out of range"); 292 if (c == 'A') 293 Aflag = (int)l; 294 else 295 Bflag = (int)l; 296 break; 297 case 'C': 298 if (optarg == NULL) 299 Aflag = Bflag = 2; 300 else { 301 l = strtol(optarg, &ep, 10); 302 if (ep == optarg || *ep != '\0' || 303 l <= 0 || l >= INT_MAX) 304 errx(2, "context out of range"); 305 Aflag = Bflag = (int)l; 306 } 307 break; 308 case 'E': 309 Fflag = Gflag = 0; 310 Eflag++; 311 break; 312 case 'F': 313 Eflag = Gflag = 0; 314 Fflag++; 315 break; 316 case 'G': 317 Eflag = Fflag = 0; 318 Gflag++; 319 break; 320 case 'H': 321 Hflag++; 322 break; 323 case 'I': 324 binbehave = BIN_FILE_SKIP; 325 break; 326 case 'L': 327 lflag = 0; 328 Lflag = qflag = 1; 329 break; 330 case 'P': 331 Pflag++; 332 break; 333 case 'S': 334 Sflag++; 335 break; 336 case 'R': 337 case 'r': 338 Rflag++; 339 oflag++; 340 break; 341 case 'U': 342 binbehave = BIN_FILE_BIN; 343 break; 344 case 'V': 345 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); 346 exit(0); 347 break; 348 #ifndef NOZ 349 case 'Z': 350 Zflag++; 351 break; 352 #endif 353 case 'a': 354 binbehave = BIN_FILE_TEXT; 355 break; 356 case 'b': 357 bflag = 1; 358 break; 359 case 'c': 360 cflag = 1; 361 break; 362 case 'e': 363 add_patterns(optarg); 364 needpattern = 0; 365 break; 366 case 'f': 367 patfile = grep_malloc(sizeof(*patfile)); 368 patfile->pf_file = optarg; 369 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next); 370 needpattern = 0; 371 break; 372 case 'h': 373 oflag = 0; 374 hflag = 1; 375 break; 376 case 'i': 377 case 'y': 378 iflag = 1; 379 cflags |= REG_ICASE; 380 break; 381 case 'l': 382 Lflag = 0; 383 lflag = qflag = 1; 384 break; 385 case 'n': 386 nflag = 1; 387 break; 388 case 'o': 389 hflag = 0; 390 oflag = 1; 391 break; 392 case 'q': 393 qflag = 1; 394 break; 395 case 's': 396 sflag = 1; 397 break; 398 case 'v': 399 vflag = 1; 400 break; 401 case 'w': 402 wflag = 1; 403 break; 404 case 'x': 405 xflag = 1; 406 break; 407 case BIN_OPT: 408 if (strcmp("binary", optarg) == 0) 409 binbehave = BIN_FILE_BIN; 410 else if (strcmp("without-match", optarg) == 0) 411 binbehave = BIN_FILE_SKIP; 412 else if (strcmp("text", optarg) == 0) 413 binbehave = BIN_FILE_TEXT; 414 else 415 errx(2, "Unknown binary-files option"); 416 break; 417 case 'u': 418 case MMAP_OPT: 419 /* default, compatibility */ 420 break; 421 case LINEBUF_OPT: 422 lbflag = 1; 423 break; 424 case HELP_OPT: 425 default: 426 usage(); 427 } 428 lastc = c; 429 newarg = optind != prevoptind; 430 prevoptind = optind; 431 } 432 argc -= optind; 433 argv += optind; 434 435 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL; 436 patfile = pf_next) { 437 pf_next = SLIST_NEXT(patfile, pf_next); 438 read_patterns(patfile->pf_file); 439 free(patfile); 440 } 441 442 if (argc == 0 && needpattern) 443 usage(); 444 445 if (argc != 0 && needpattern) { 446 add_patterns(*argv); 447 --argc; 448 ++argv; 449 } 450 451 if (Eflag) 452 cflags |= REG_EXTENDED; 453 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 454 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 455 for (i = 0; i < patterns; ++i) { 456 /* Check if cheating is allowed (always is for fgrep). */ 457 if (Fflag) { 458 fgrepcomp(&fg_pattern[i], pattern[i]); 459 } else { 460 if (fastcomp(&fg_pattern[i], pattern[i])) { 461 /* Fall back to full regex library */ 462 c = regcomp(&r_pattern[i], pattern[i], cflags); 463 if (c != 0) { 464 regerror(c, &r_pattern[i], re_error, 465 RE_ERROR_BUF); 466 errx(2, "%s", re_error); 467 } 468 } 469 } 470 } 471 472 if (lbflag) 473 setlinebuf(stdout); 474 475 if ((argc == 0 || argc == 1) && !oflag) 476 hflag = 1; 477 478 if (argc == 0) 479 exit(!procfile(NULL)); 480 481 if (Rflag) 482 c = grep_tree(argv); 483 else 484 for (c = 0; argc--; ++argv) 485 c += procfile(*argv); 486 487 exit(!c); 488 } 489