1 /* $NetBSD: grep.c,v 1.3 2006/05/15 21:12:21 rillig Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __RCSID("$NetBSD: grep.c,v 1.3 2006/05/15 21:12:21 rillig Exp $"); 35 #endif /* not lint */ 36 37 #include <sys/types.h> 38 #include <sys/stat.h> 39 40 #include <err.h> 41 #include <errno.h> 42 #include <getopt.h> 43 #include <limits.h> 44 #include <regex.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 50 #include "grep.h" 51 52 /* 53 * Upper bound of number of digits to represent an int in decimal 54 * 2^8n <= 10^3n. Allow a terminator. 55 */ 56 #define MAX_BUF_DIGITS (sizeof(int) * 3) + 1 57 58 /* Flags passed to regcomp() and regexec() */ 59 int cflags = REG_BASIC; 60 int eflags = REG_STARTEND; 61 62 int matchall; /* shortcut */ 63 int patterns, pattern_sz; 64 char **pattern; 65 regex_t *r_pattern; 66 67 /* For regex errors */ 68 char re_error[RE_ERROR_BUF + 1]; 69 70 /* Command-line flags */ 71 int Aflag; /* -A x: print x lines trailing each match */ 72 int Bflag; /* -B x: print x lines leading each match */ 73 int Eflag; /* -E: interpret pattern as extended regexp */ 74 int Fflag; /* -F: interpret pattern as list of fixed strings */ 75 int Gflag; /* -G: interpret pattern as basic regexp */ 76 int Hflag; /* -H: Always print filenames */ 77 int Lflag; /* -L: only show names of files with no matches */ 78 /*int Pflag; *//* -P: if -r, no symlinks are followed */ 79 /*int Sflag; *//* -S: if -r, follow all symlinks */ 80 int bflag; /* -b: show block numbers for each match */ 81 int cflag; /* -c: only show a count of matching lines */ 82 int hflag; /* -h: Never print filenames. -H overrides */ 83 int lflag; /* -l: only show names of files with matches */ 84 int mflag; /* -m: specify maximum line matches (per file) */ 85 int nflag; /* -n: show line numbers in front of matching lines */ 86 int oflag; /* -o: only print out matches */ 87 int qflag; /* -q: quiet mode (don't output anything) */ 88 int sflag; /* -s: silent mode (ignore errors) */ 89 int vflag; /* -v: only show non-matching lines */ 90 int wflag; /* -w: pattern must start and end on word boundaries */ 91 int xflag; /* -x: pattern must match entire line */ 92 int lbflag; /* --line-buffered */ 93 94 int colours = 0; /* Attempt to use terminal colours */ 95 const char *grep_colour = "01;32"; /* Default colour string, green */ 96 char *uc; 97 98 /* Characters to print after filenames */ 99 char fn_endchar = '\n'; 100 char fn_colonchar = ':'; 101 char fn_dashchar = '-'; 102 char line_endchar = '\n'; /* End of line character */ 103 104 int maxcount = 0; /* Maximum line matches per file */ 105 int output_filenames = 0; 106 107 /* Argv[0] flags */ 108 int zgrep; /* If we are invoked as zgrep */ 109 110 int binbehave = BIN_FILE_BIN; 111 int dirbehave = GREP_READ; 112 int devbehave = GREP_READ; 113 /*int linkbehave = LINK_FOLLOW;*/ 114 const char *stdin_label; 115 116 enum { 117 BIN_OPT = CHAR_MAX + 1, 118 HELP_OPT, 119 LABEL_OPT, 120 MMAP_OPT, 121 LINK_OPT, 122 COLOUR_OPT, 123 LINEBUF_OPT 124 }; 125 126 /* Housekeeping */ 127 int first; /* flag whether or not this is our first match */ 128 int tail; /* lines left to print */ 129 130 static void 131 usage(void) 132 { 133 fprintf(stderr, "usage: %s %s %s\n", 134 getprogname(), 135 "[-[ABC] num] [-EFGHILVZabcdhilnoqrsvwxz]", 136 "[-D action] [-d action] [-e pattern] [-f file]"); 137 exit(2); 138 } 139 140 static const char *optstr = "0123456789A:B:C:D:EFGHILUVZabcd:e:f:hilm:noqrsuvwxyz"; 141 142 struct option long_options[] = 143 { 144 {"binary-files", required_argument, NULL, BIN_OPT}, 145 {"help", no_argument, NULL, HELP_OPT}, 146 {"label", required_argument, NULL, LABEL_OPT}, 147 {"mmap", no_argument, NULL, MMAP_OPT}, 148 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 149 /* {"links", required_argument, NULL, LINK_OPT},*/ 150 {"after-context", required_argument, NULL, 'A'}, 151 {"before-context", required_argument, NULL, 'B'}, 152 {"color", optional_argument, NULL, COLOUR_OPT}, 153 {"colour", optional_argument, NULL, COLOUR_OPT}, 154 {"context", optional_argument, NULL, 'C'}, 155 {"devices", required_argument, NULL, 'D'}, 156 {"extended-regexp", no_argument, NULL, 'E'}, 157 {"fixed-strings", no_argument, NULL, 'F'}, 158 {"fixed-regexp", no_argument, NULL, 'F'}, 159 {"basic-regexp", no_argument, NULL, 'G'}, 160 {"with-filename", no_argument, NULL, 'H'}, 161 {"files-without-match", no_argument, NULL, 'L'}, 162 {"binary", no_argument, NULL, 'U'}, 163 {"version", no_argument, NULL, 'V'}, 164 {"null", no_argument, NULL, 'Z'}, 165 {"text", no_argument, NULL, 'a'}, 166 {"byte-offset", no_argument, NULL, 'b'}, 167 {"count", no_argument, NULL, 'c'}, 168 {"directories", required_argument, NULL, 'd'}, 169 {"regexp", required_argument, NULL, 'e'}, 170 {"file", required_argument, NULL, 'f'}, 171 {"no-filename", no_argument, NULL, 'h'}, 172 {"ignore-case", no_argument, NULL, 'i'}, 173 {"files-with-matches", no_argument, NULL, 'l'}, 174 {"max-count", required_argument, NULL, 'm'}, 175 {"line-number", no_argument, NULL, 'n'}, 176 {"only-matching", no_argument, NULL, 'o'}, 177 {"quiet", no_argument, NULL, 'q'}, 178 {"silent", no_argument, NULL, 'q'}, 179 {"recursive", no_argument, NULL, 'r'}, 180 {"no-messages", no_argument, NULL, 's'}, 181 {"unix-byte-offsets", no_argument, NULL, 'u'}, 182 {"invert-match", no_argument, NULL, 'v'}, 183 {"word-regexp", no_argument, NULL, 'w'}, 184 {"line-regexp", no_argument, NULL, 'x'}, 185 {"null-data", no_argument, NULL, 'z'}, 186 187 {NULL, no_argument, NULL, 0} 188 }; 189 190 static void 191 add_pattern(char *pat, size_t len) 192 { 193 if (len == 0 || matchall) { 194 matchall = 1; 195 return; 196 } 197 if (patterns == pattern_sz) { 198 pattern_sz *= 2; 199 pattern = grep_realloc(pattern, ++pattern_sz * sizeof(*pattern)); 200 } 201 if (pat[len - 1] == '\n') 202 --len; 203 pattern[patterns] = grep_malloc(len + 1); 204 strncpy(pattern[patterns], pat, len); 205 pattern[patterns][len] = '\0'; 206 ++patterns; 207 } 208 209 static void 210 read_patterns(char *fn) 211 { 212 FILE *f; 213 char *line; 214 size_t len; 215 int nl; 216 217 if ((f = fopen(fn, "r")) == NULL) 218 err(2, "%s", fn); 219 nl = 0; 220 while ((line = fgetln(f, &len)) != NULL) { 221 if (*line == '\n') { 222 ++nl; 223 continue; 224 } 225 if (nl) { 226 matchall = 1; 227 break; 228 } 229 nl = 0; 230 add_pattern(line, len); 231 } 232 if (ferror(f)) 233 err(2, "%s", fn); 234 fclose(f); 235 } 236 237 static int 238 check_context_arg(char const *str) { 239 char *ep; 240 long lval; 241 242 errno = 0; 243 lval = strtol(str, &ep, 10); 244 245 if (str[0] == '\0' || *ep != '\0') 246 errx(2, "Invalid context argument"); 247 248 if ((errno == ERANGE && (lval == LONG_MAX || lval == LONG_MIN)) || 249 (lval > INT_MAX || lval < INT_MIN)) 250 errx(2, "Context argument out of range"); 251 252 return lval; 253 254 } 255 256 static int 257 grep_getopt(int argc, char *const *argv) 258 { 259 size_t ptr; 260 int c; 261 char buffer[MAX_BUF_DIGITS]; 262 263 ptr = 0; 264 while (c = getopt_long(argc, argv, optstr, long_options, 265 (int *)NULL), '0' <= c && 266 c <= '9' && ptr < MAX_BUF_DIGITS) { 267 268 /* Avoid leading zeros */ 269 if (ptr != 0 || (ptr == 0 && c != '0')) 270 buffer[ptr++] = c; 271 } 272 273 if (ptr >= MAX_BUF_DIGITS) 274 errx(2, "Context argument out of range"); 275 276 if (ptr != 0) { 277 buffer[ptr] = '\0'; /* We now have a string of digits */ 278 Aflag = Bflag = check_context_arg(buffer); 279 } 280 281 return c; 282 } 283 284 int 285 main(int argc, char *argv[]) 286 { 287 const char *progname; 288 int c, i; 289 struct stat sb; 290 291 stdin_label = "(standard input)"; 292 293 progname = getprogname(); 294 switch (progname[0]) { 295 case 'e': 296 Eflag++; 297 break; 298 case 'f': 299 Fflag++; 300 break; 301 case 'g': 302 Gflag++; 303 break; 304 case 'z': 305 zgrep++; 306 switch (progname[1]) { 307 case 'e': 308 Eflag++; 309 break; 310 case 'f': 311 Fflag++; 312 break; 313 case 'g': 314 Gflag++; 315 break; 316 } 317 break; 318 } 319 320 while ((c = grep_getopt(argc, argv)) != -1) { 321 322 switch (c) { 323 324 case 'A': 325 Aflag = check_context_arg(optarg); 326 break; 327 case 'B': 328 Bflag = check_context_arg(optarg); 329 break; 330 case 'C': 331 if (optarg == NULL) 332 Aflag = Bflag = 2; 333 else 334 Aflag = Bflag = check_context_arg(optarg); 335 break; 336 case 'D': 337 if (strcmp("read", optarg) == 0) 338 devbehave = GREP_READ; 339 else if (strcmp("skip", optarg) == 0) 340 devbehave = GREP_SKIP; 341 else { 342 errx(2, "Unknown device option"); 343 } 344 break; 345 346 case 'E': 347 Fflag = Gflag = 0; 348 Eflag++; 349 break; 350 case 'F': 351 Eflag = Gflag = 0; 352 Fflag++; 353 break; 354 case 'G': 355 Eflag = Fflag = 0; 356 Gflag++; 357 break; 358 case 'H': 359 Hflag = 1; 360 break; 361 case 'I': 362 binbehave = BIN_FILE_SKIP; 363 break; 364 case 'L': 365 lflag = 0; 366 Lflag = qflag = 1; 367 break; 368 /* case 'P': 369 linkbehave = LINK_SKIP; 370 break; 371 case 'S': 372 linkbehave = LINK_FOLLOW; 373 break;*/ 374 case 'R': 375 case 'r': 376 dirbehave = GREP_RECURSE; 377 break; 378 case 'U': 379 case 'u': 380 /* these are here for compatability */ 381 break; 382 case 'V': 383 fprintf(stdout, "grep version %s\n", VERSION); 384 exit(0); 385 break; 386 case 'Z': 387 fn_colonchar = fn_endchar = fn_dashchar = 0; 388 break; 389 case 'a': 390 binbehave = BIN_FILE_TEXT; 391 break; 392 case 'b': 393 bflag = 1; 394 break; 395 case 'c': 396 cflag = 1; 397 break; 398 case 'd': 399 if (strcmp("read", optarg) == 0) 400 dirbehave = GREP_READ; 401 else if (strcmp("skip", optarg) == 0) 402 dirbehave = GREP_SKIP; 403 else if (strcmp("recurse", optarg) == 0) 404 dirbehave = GREP_RECURSE; 405 else { 406 errx(2, "Unknown directory option\n"); 407 } 408 break; 409 410 case 'e': 411 add_pattern(optarg, strlen(optarg)); 412 break; 413 case 'f': 414 read_patterns(optarg); 415 break; 416 case 'h': 417 hflag = 1; 418 break; 419 case 'i': 420 case 'y': 421 cflags |= REG_ICASE; 422 break; 423 case 'l': 424 Lflag = 0; 425 lflag = qflag = 1; 426 break; 427 case 'm': 428 mflag = 1; 429 maxcount = strtol(optarg, (char **)NULL, 10); 430 break; 431 case 'n': 432 nflag = 1; 433 break; 434 case 'o': 435 oflag = 1; 436 break; 437 case 'q': 438 qflag = 1; 439 break; 440 case 's': 441 sflag = 1; 442 break; 443 case 'v': 444 vflag = 1; 445 break; 446 case 'w': 447 wflag = 1; 448 break; 449 case 'x': 450 xflag = 1; 451 break; 452 case 'z': 453 line_endchar = 0; 454 break; 455 case BIN_OPT: 456 if (strcmp("binary", optarg) == 0) 457 binbehave = BIN_FILE_BIN; 458 else if (strcmp("without-match", optarg) == 0) 459 binbehave = BIN_FILE_SKIP; 460 else if (strcmp("text", optarg) == 0) 461 binbehave = BIN_FILE_TEXT; 462 else { 463 errx(2, "Unknown binary-files option\n"); 464 } 465 break; 466 467 case COLOUR_OPT: 468 if (optarg == NULL || strcmp("auto", optarg) == 0 || 469 strcmp("tty", optarg) == 0 || 470 strcmp("if-tty", optarg) == 0) { 471 472 /* Check that stdout is a terminal */ 473 if (isatty(STDOUT_FILENO) && 474 getenv("TERM") && 475 strcmp(getenv("TERM"), "dumb") != 0) 476 colours = 1; 477 else 478 colours = 0; 479 480 } else if (strcmp("always", optarg) == 0 || 481 strcmp("yes", optarg) == 0 || 482 strcmp("force", optarg) == 0) 483 colours = 1; 484 else if (strcmp("never", optarg) == 0 || 485 strcmp("no", optarg) == 0 || 486 strcmp("none", optarg) == 0) 487 colours = 0; 488 else 489 errx(2, "Unknown color option\n"); 490 491 uc = getenv("GREP_COLOR"); 492 if (colours == 1 && uc != NULL && *uc != '\0') 493 grep_colour = uc; 494 break; 495 case LABEL_OPT: 496 stdin_label = optarg; 497 break; 498 case MMAP_OPT: 499 break; 500 /* 501 * case LINK_OPT: 502 * if (strcmp("explicit", optarg) == 0) 503 * linkbehave = LINK_EXPLICIT; 504 * else if (strcmp("follow", optarg) == 0) 505 * linkbehave = LINK_FOLLOW; 506 * else if (strcmp("skip", optarg) == 0) 507 * linkbehave = LINK_SKIP; 508 * else { 509 * errx(2, "Unknown links option\n"); 510 * } 511 * break; 512 */ 513 case LINEBUF_OPT: 514 lbflag = 1; 515 break; 516 517 case HELP_OPT: 518 default: 519 usage(); 520 } 521 522 } 523 524 argc -= optind; 525 argv += optind; 526 527 if (argc == 0 && patterns == 0) 528 usage(); 529 if (patterns == 0) { 530 add_pattern(*argv, strlen(*argv)); 531 --argc; 532 ++argv; 533 } 534 535 if (Eflag) 536 cflags |= REG_EXTENDED; 537 else if (Fflag) 538 cflags |= REG_NOSPEC; 539 r_pattern = grep_malloc(patterns * sizeof(*r_pattern)); 540 for (i = 0; i < patterns; ++i) { 541 if ((c = regcomp(&r_pattern[i], pattern[i], cflags))) { 542 regerror(c, &r_pattern[i], re_error, RE_ERROR_BUF); 543 errx(2, "%s", re_error); 544 } 545 } 546 547 if ((argc > 1 && !hflag) || Hflag) 548 output_filenames = 1; 549 550 if (argc == 1 && !hflag && dirbehave == GREP_RECURSE) 551 if (!stat(*argv, &sb) && (sb.st_mode & S_IFMT) == S_IFDIR) 552 output_filenames = 1; 553 554 if (argc == 0) 555 exit(!procfile(NULL)); 556 557 if (lbflag) 558 setlinebuf(stdout); 559 560 if (dirbehave == GREP_RECURSE) 561 c = grep_tree(argv); 562 else 563 for (c = 0; argc--; ++argv) 564 c += procfile(*argv); 565 566 exit(!c); 567 } 568