1 /* grep.c - main driver file for grep. 2 Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written July 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include <sys/types.h> 23 #include <sys/stat.h> 24 #include <wchar.h> 25 #include <wctype.h> 26 #include <fcntl.h> 27 #include <inttypes.h> 28 #include <stdio.h> 29 #include "system.h" 30 31 #include "argmatch.h" 32 #include "c-ctype.h" 33 #include "closeout.h" 34 #include "colorize.h" 35 #include "error.h" 36 #include "exclude.h" 37 #include "exitfail.h" 38 #include "fcntl-safer.h" 39 #include "fts_.h" 40 #include "getopt.h" 41 #include "grep.h" 42 #include "intprops.h" 43 #include "progname.h" 44 #include "propername.h" 45 #include "quote.h" 46 #include "safe-read.h" 47 #include "search.h" 48 #include "version-etc.h" 49 #include "xalloc.h" 50 #include "xstrtol.h" 51 52 #define SEP_CHAR_SELECTED ':' 53 #define SEP_CHAR_REJECTED '-' 54 #define SEP_STR_GROUP "--" 55 56 #define AUTHORS \ 57 proper_name ("Mike Haertel"), \ 58 _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>") 59 60 /* When stdout is connected to a regular file, save its stat 61 information here, so that we can automatically skip it, thus 62 avoiding a potential (racy) infinite loop. */ 63 static struct stat out_stat; 64 65 /* if non-zero, display usage information and exit */ 66 static int show_help; 67 68 /* Print the version on standard output and exit. */ 69 static bool show_version; 70 71 /* Suppress diagnostics for nonexistent or unreadable files. */ 72 static bool suppress_errors; 73 74 /* If nonzero, use color markers. */ 75 static int color_option; 76 77 /* Show only the part of a line matching the expression. */ 78 static bool only_matching; 79 80 /* If nonzero, make sure first content char in a line is on a tab stop. */ 81 static bool align_tabs; 82 83 #if HAVE_ASAN 84 /* Record the starting address and length of the sole poisoned region, 85 so that we can unpoison it later, just before each following read. */ 86 static void const *poison_buf; 87 static size_t poison_len; 88 89 static void 90 clear_asan_poison (void) 91 { 92 if (poison_buf) 93 __asan_unpoison_memory_region (poison_buf, poison_len); 94 } 95 96 static void 97 asan_poison (void const *addr, size_t size) 98 { 99 poison_buf = addr; 100 poison_len = size; 101 102 __asan_poison_memory_region (poison_buf, poison_len); 103 } 104 #else 105 static void clear_asan_poison (void) { } 106 static void asan_poison (void const volatile *addr, size_t size) { } 107 #endif 108 109 /* The group separator used when context is requested. */ 110 static const char *group_separator = SEP_STR_GROUP; 111 112 /* The context and logic for choosing default --color screen attributes 113 (foreground and background colors, etc.) are the following. 114 -- There are eight basic colors available, each with its own 115 nominal luminosity to the human eye and foreground/background 116 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41], 117 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46], 118 yellow [89 %, 33/43], and white [100 %, 37/47]). 119 -- Sometimes, white as a background is actually implemented using 120 a shade of light gray, so that a foreground white can be visible 121 on top of it (but most often not). 122 -- Sometimes, black as a foreground is actually implemented using 123 a shade of dark gray, so that it can be visible on top of a 124 background black (but most often not). 125 -- Sometimes, more colors are available, as extensions. 126 -- Other attributes can be selected/deselected (bold [1/22], 127 underline [4/24], standout/inverse [7/27], blink [5/25], and 128 invisible/hidden [8/28]). They are sometimes implemented by 129 using colors instead of what their names imply; e.g., bold is 130 often achieved by using brighter colors. In practice, only bold 131 is really available to us, underline sometimes being mapped by 132 the terminal to some strange color choice, and standout best 133 being left for use by downstream programs such as less(1). 134 -- We cannot assume that any of the extensions or special features 135 are available for the purpose of choosing defaults for everyone. 136 -- The most prevalent default terminal backgrounds are pure black 137 and pure white, and are not necessarily the same shades of 138 those as if they were selected explicitly with SGR sequences. 139 Some terminals use dark or light pictures as default background, 140 but those are covered over by an explicit selection of background 141 color with an SGR sequence; their users will appreciate their 142 background pictures not be covered like this, if possible. 143 -- Some uses of colors attributes is to make some output items 144 more understated (e.g., context lines); this cannot be achieved 145 by changing the background color. 146 -- For these reasons, the grep color defaults should strive not 147 to change the background color from its default, unless it's 148 for a short item that should be highlighted, not understated. 149 -- The grep foreground color defaults (without an explicitly set 150 background) should provide enough contrast to be readable on any 151 terminal with either a black (dark) or white (light) background. 152 This only leaves red, magenta, green, and cyan (and their bold 153 counterparts) and possibly bold blue. */ 154 /* The color strings used for matched text. 155 The user can overwrite them using the deprecated 156 environment variable GREP_COLOR or the new GREP_COLORS. */ 157 static const char *selected_match_color = "01;31"; /* bold red */ 158 static const char *context_match_color = "01;31"; /* bold red */ 159 160 /* Other colors. Defaults look damn good. */ 161 static const char *filename_color = "35"; /* magenta */ 162 static const char *line_num_color = "32"; /* green */ 163 static const char *byte_num_color = "32"; /* green */ 164 static const char *sep_color = "36"; /* cyan */ 165 static const char *selected_line_color = ""; /* default color pair */ 166 static const char *context_line_color = ""; /* default color pair */ 167 168 /* Select Graphic Rendition (SGR, "\33[...m") strings. */ 169 /* Also Erase in Line (EL) to Right ("\33[K") by default. */ 170 /* Why have EL to Right after SGR? 171 -- The behavior of line-wrapping when at the bottom of the 172 terminal screen and at the end of the current line is often 173 such that a new line is introduced, entirely cleared with 174 the current background color which may be different from the 175 default one (see the boolean back_color_erase terminfo(5) 176 capability), thus scrolling the display by one line. 177 The end of this new line will stay in this background color 178 even after reverting to the default background color with 179 "\33[m', unless it is explicitly cleared again with "\33[K" 180 (which is the behavior the user would instinctively expect 181 from the whole thing). There may be some unavoidable 182 background-color flicker at the end of this new line because 183 of this (when timing with the monitor's redraw is just right). 184 -- The behavior of HT (tab, "\t") is usually the same as that of 185 Cursor Forward Tabulation (CHT) with a default parameter 186 of 1 ("\33[I"), i.e., it performs pure movement to the next 187 tab stop, without any clearing of either content or screen 188 attributes (including background color); try 189 printf 'asdfqwerzxcv\rASDF\tZXCV\n' 190 in a bash(1) shell to demonstrate this. This is not what the 191 user would instinctively expect of HT (but is ok for CHT). 192 The instinctive behavior would include clearing the terminal 193 cells that are skipped over by HT with blank cells in the 194 current screen attributes, including background color; 195 the boolean dest_tabs_magic_smso terminfo(5) capability 196 indicates this saner behavior for HT, but only some rare 197 terminals have it (although it also indicates a special 198 glitch with standout mode in the Teleray terminal for which 199 it was initially introduced). The remedy is to add "\33K" 200 after each SGR sequence, be it START (to fix the behavior 201 of any HT after that before another SGR) or END (to fix the 202 behavior of an HT in default background color that would 203 follow a line-wrapping at the bottom of the screen in another 204 background color, and to complement doing it after START). 205 Piping grep's output through a pager such as less(1) avoids 206 any HT problems since the pager performs tab expansion. 207 208 Generic disadvantages of this remedy are: 209 -- Some very rare terminals might support SGR but not EL (nobody 210 will use "grep --color" on a terminal that does not support 211 SGR in the first place). 212 -- Having these extra control sequences might somewhat complicate 213 the task of any program trying to parse "grep --color" 214 output in order to extract structuring information from it. 215 A specific disadvantage to doing it after SGR START is: 216 -- Even more possible background color flicker (when timing 217 with the monitor's redraw is just right), even when not at the 218 bottom of the screen. 219 There are no additional disadvantages specific to doing it after 220 SGR END. 221 222 It would be impractical for GNU grep to become a full-fledged 223 terminal program linked against ncurses or the like, so it will 224 not detect terminfo(5) capabilities. */ 225 static const char *sgr_start = "\33[%sm\33[K"; 226 static const char *sgr_end = "\33[m\33[K"; 227 228 /* SGR utility functions. */ 229 static void 230 pr_sgr_start (char const *s) 231 { 232 if (*s) 233 print_start_colorize (sgr_start, s); 234 } 235 static void 236 pr_sgr_end (char const *s) 237 { 238 if (*s) 239 print_end_colorize (sgr_end); 240 } 241 static void 242 pr_sgr_start_if (char const *s) 243 { 244 if (color_option) 245 pr_sgr_start (s); 246 } 247 static void 248 pr_sgr_end_if (char const *s) 249 { 250 if (color_option) 251 pr_sgr_end (s); 252 } 253 254 struct color_cap 255 { 256 const char *name; 257 const char **var; 258 void (*fct) (void); 259 }; 260 261 static void 262 color_cap_mt_fct (void) 263 { 264 /* Our caller just set selected_match_color. */ 265 context_match_color = selected_match_color; 266 } 267 268 static void 269 color_cap_rv_fct (void) 270 { 271 /* By this point, it was 1 (or already -1). */ 272 color_option = -1; /* That's still != 0. */ 273 } 274 275 static void 276 color_cap_ne_fct (void) 277 { 278 sgr_start = "\33[%sm"; 279 sgr_end = "\33[m"; 280 } 281 282 /* For GREP_COLORS. */ 283 static const struct color_cap color_dict[] = 284 { 285 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */ 286 { "ms", &selected_match_color, NULL }, /* selected matched text */ 287 { "mc", &context_match_color, NULL }, /* context matched text */ 288 { "fn", &filename_color, NULL }, /* filename */ 289 { "ln", &line_num_color, NULL }, /* line number */ 290 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */ 291 { "se", &sep_color, NULL }, /* separator */ 292 { "sl", &selected_line_color, NULL }, /* selected lines */ 293 { "cx", &context_line_color, NULL }, /* context lines */ 294 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */ 295 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */ 296 { NULL, NULL, NULL } 297 }; 298 299 static struct exclude *excluded_patterns; 300 static struct exclude *excluded_directory_patterns; 301 /* Short options. */ 302 static char const short_options[] = 303 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; 304 305 /* Non-boolean long options that have no corresponding short equivalents. */ 306 enum 307 { 308 BINARY_FILES_OPTION = CHAR_MAX + 1, 309 COLOR_OPTION, 310 EXCLUDE_DIRECTORY_OPTION, 311 EXCLUDE_OPTION, 312 EXCLUDE_FROM_OPTION, 313 GROUP_SEPARATOR_OPTION, 314 INCLUDE_OPTION, 315 LINE_BUFFERED_OPTION, 316 LABEL_OPTION 317 }; 318 319 /* Long options equivalences. */ 320 static struct option const long_options[] = 321 { 322 {"basic-regexp", no_argument, NULL, 'G'}, 323 {"extended-regexp", no_argument, NULL, 'E'}, 324 {"fixed-regexp", no_argument, NULL, 'F'}, 325 {"fixed-strings", no_argument, NULL, 'F'}, 326 {"perl-regexp", no_argument, NULL, 'P'}, 327 {"after-context", required_argument, NULL, 'A'}, 328 {"before-context", required_argument, NULL, 'B'}, 329 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, 330 {"byte-offset", no_argument, NULL, 'b'}, 331 {"context", required_argument, NULL, 'C'}, 332 {"color", optional_argument, NULL, COLOR_OPTION}, 333 {"colour", optional_argument, NULL, COLOR_OPTION}, 334 {"count", no_argument, NULL, 'c'}, 335 {"devices", required_argument, NULL, 'D'}, 336 {"directories", required_argument, NULL, 'd'}, 337 {"exclude", required_argument, NULL, EXCLUDE_OPTION}, 338 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, 339 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION}, 340 {"file", required_argument, NULL, 'f'}, 341 {"files-with-matches", no_argument, NULL, 'l'}, 342 {"files-without-match", no_argument, NULL, 'L'}, 343 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, 344 {"help", no_argument, &show_help, 1}, 345 {"include", required_argument, NULL, INCLUDE_OPTION}, 346 {"ignore-case", no_argument, NULL, 'i'}, 347 {"initial-tab", no_argument, NULL, 'T'}, 348 {"label", required_argument, NULL, LABEL_OPTION}, 349 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, 350 {"line-number", no_argument, NULL, 'n'}, 351 {"line-regexp", no_argument, NULL, 'x'}, 352 {"max-count", required_argument, NULL, 'm'}, 353 354 {"no-filename", no_argument, NULL, 'h'}, 355 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION}, 356 {"no-messages", no_argument, NULL, 's'}, 357 {"null", no_argument, NULL, 'Z'}, 358 {"null-data", no_argument, NULL, 'z'}, 359 {"only-matching", no_argument, NULL, 'o'}, 360 {"quiet", no_argument, NULL, 'q'}, 361 {"recursive", no_argument, NULL, 'r'}, 362 {"dereference-recursive", no_argument, NULL, 'R'}, 363 {"regexp", required_argument, NULL, 'e'}, 364 {"invert-match", no_argument, NULL, 'v'}, 365 {"silent", no_argument, NULL, 'q'}, 366 {"text", no_argument, NULL, 'a'}, 367 {"binary", no_argument, NULL, 'U'}, 368 {"unix-byte-offsets", no_argument, NULL, 'u'}, 369 {"version", no_argument, NULL, 'V'}, 370 {"with-filename", no_argument, NULL, 'H'}, 371 {"word-regexp", no_argument, NULL, 'w'}, 372 {0, 0, 0, 0} 373 }; 374 375 /* Define flags declared in grep.h. */ 376 bool match_icase; 377 bool match_words; 378 bool match_lines; 379 char eolbyte; 380 enum textbin input_textbin; 381 382 static char const *matcher; 383 384 /* For error messages. */ 385 /* The input file name, or (if standard input) "-" or a --label argument. */ 386 static char const *filename; 387 /* Omit leading "./" from file names in diagnostics. */ 388 static bool omit_dot_slash; 389 static bool errseen; 390 static bool write_error_seen; 391 392 enum directories_type 393 { 394 READ_DIRECTORIES = 2, 395 RECURSE_DIRECTORIES, 396 SKIP_DIRECTORIES 397 }; 398 399 /* How to handle directories. */ 400 static char const *const directories_args[] = 401 { 402 "read", "recurse", "skip", NULL 403 }; 404 static enum directories_type const directories_types[] = 405 { 406 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES 407 }; 408 ARGMATCH_VERIFY (directories_args, directories_types); 409 410 static enum directories_type directories = READ_DIRECTORIES; 411 412 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK }; 413 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL; 414 415 /* How to handle devices. */ 416 static enum 417 { 418 READ_COMMAND_LINE_DEVICES, 419 READ_DEVICES, 420 SKIP_DEVICES 421 } devices = READ_COMMAND_LINE_DEVICES; 422 423 static bool grepfile (int, char const *, bool, bool); 424 static bool grepdesc (int, bool); 425 426 static void dos_binary (void); 427 static void dos_unix_byte_offsets (void); 428 static size_t undossify_input (char *, size_t); 429 430 static bool 431 is_device_mode (mode_t m) 432 { 433 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m); 434 } 435 436 static bool 437 skip_devices (bool command_line) 438 { 439 return (devices == SKIP_DEVICES 440 || (devices == READ_COMMAND_LINE_DEVICES && !command_line)); 441 } 442 443 /* Return if ST->st_size is defined. Assume the file is not a 444 symbolic link. */ 445 static bool 446 usable_st_size (struct stat const *st) 447 { 448 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st); 449 } 450 451 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them. 452 Do not rely on these finding data or holes if they equal SEEK_SET. */ 453 #ifndef SEEK_DATA 454 enum { SEEK_DATA = SEEK_SET }; 455 #endif 456 #ifndef SEEK_HOLE 457 enum { SEEK_HOLE = SEEK_SET }; 458 #endif 459 460 /* Functions we'll use to search. */ 461 typedef void (*compile_fp_t) (char const *, size_t); 462 typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); 463 static compile_fp_t compile; 464 static execute_fp_t execute; 465 466 /* Like error, but suppress the diagnostic if requested. */ 467 static void 468 suppressible_error (char const *mesg, int errnum) 469 { 470 if (! suppress_errors) 471 error (0, errnum, "%s", mesg); 472 errseen = true; 473 } 474 475 /* If there has already been a write error, don't bother closing 476 standard output, as that might elicit a duplicate diagnostic. */ 477 static void 478 clean_up_stdout (void) 479 { 480 if (! write_error_seen) 481 close_stdout (); 482 } 483 484 static bool 485 textbin_is_binary (enum textbin textbin) 486 { 487 return textbin < TEXTBIN_UNKNOWN; 488 } 489 490 /* The high-order bit of a byte. */ 491 enum { HIBYTE = 0x80 }; 492 493 /* True if every byte with HIBYTE off is a single-byte character. 494 UTF-8 has this property. */ 495 static bool easy_encoding; 496 497 static void 498 init_easy_encoding (void) 499 { 500 easy_encoding = true; 501 for (int i = 0; i < HIBYTE; i++) 502 easy_encoding &= mbclen_cache[i] == 1; 503 } 504 505 /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL 506 is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer 507 the alignment and would otherwise complain about the cast. */ 508 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) 509 # define CAST_ALIGNED(type, val) \ 510 ({ __typeof__ (val) val_ = val; \ 511 _Pragma ("GCC diagnostic push") \ 512 _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \ 513 (type) val_; \ 514 _Pragma ("GCC diagnostic pop") \ 515 }) 516 #else 517 # define CAST_ALIGNED(type, val) ((type) (val)) 518 #endif 519 520 /* An unsigned type suitable for fast matching. */ 521 typedef uintmax_t uword; 522 523 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel 524 that is not easy, and return a pointer to the first non-easy byte. 525 In easy encodings, the easy bytes all have HIBYTE off. 526 In other encodings, no byte is easy. */ 527 static char const * _GL_ATTRIBUTE_PURE 528 skip_easy_bytes (char const *buf) 529 { 530 if (!easy_encoding) 531 return buf; 532 533 uword uword_max = -1; 534 535 /* 0x8080..., extended to be wide enough for uword. */ 536 uword hibyte_mask = uword_max / UCHAR_MAX * HIBYTE; 537 538 /* Search a byte at a time until the pointer is aligned, then a 539 uword at a time until a match is found, then a byte at a time to 540 identify the exact byte. The uword search may go slightly past 541 the buffer end, but that's benign. */ 542 char const *p; 543 uword const *s; 544 for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++) 545 if (*p & HIBYTE) 546 return p; 547 for (s = CAST_ALIGNED (uword const *, p); ! (*s & hibyte_mask); s++) 548 continue; 549 for (p = (char const *) s; ! (*p & HIBYTE); p++) 550 continue; 551 return p; 552 } 553 554 /* Return the text type of data in BUF, of size SIZE. 555 BUF must be followed by at least sizeof (uword) bytes, 556 which may be arbitrarily written to or read from. */ 557 static enum textbin 558 buffer_textbin (char *buf, size_t size) 559 { 560 if (eolbyte && memchr (buf, '\0', size)) 561 return TEXTBIN_BINARY; 562 563 if (1 < MB_CUR_MAX) 564 { 565 mbstate_t mbs = { 0 }; 566 size_t clen; 567 char const *p; 568 569 buf[size] = -1; 570 for (p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen) 571 { 572 clen = mbrlen (p, buf + size - p, &mbs); 573 if ((size_t) -2 <= clen) 574 return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY; 575 } 576 } 577 578 return TEXTBIN_TEXT; 579 } 580 581 /* Return the text type of a file. BUF, of size SIZE, is the initial 582 buffer read from the file with descriptor FD and status ST. 583 BUF must be followed by at least sizeof (uword) bytes, 584 which may be arbitrarily written to or read from. */ 585 static enum textbin 586 file_textbin (char *buf, size_t size, int fd, struct stat const *st) 587 { 588 enum textbin textbin = buffer_textbin (buf, size); 589 if (textbin_is_binary (textbin)) 590 return textbin; 591 592 if (usable_st_size (st)) 593 { 594 if (st->st_size <= size) 595 return textbin == TEXTBIN_UNKNOWN ? TEXTBIN_BINARY : textbin; 596 597 /* If the file has holes, it must contain a null byte somewhere. */ 598 if (SEEK_HOLE != SEEK_SET && eolbyte) 599 { 600 off_t cur = size; 601 if (O_BINARY || fd == STDIN_FILENO) 602 { 603 cur = lseek (fd, 0, SEEK_CUR); 604 if (cur < 0) 605 return TEXTBIN_UNKNOWN; 606 } 607 608 /* Look for a hole after the current location. */ 609 off_t hole_start = lseek (fd, cur, SEEK_HOLE); 610 if (0 <= hole_start) 611 { 612 if (lseek (fd, cur, SEEK_SET) < 0) 613 suppressible_error (filename, errno); 614 if (hole_start < st->st_size) 615 return TEXTBIN_BINARY; 616 } 617 } 618 } 619 620 return TEXTBIN_UNKNOWN; 621 } 622 623 /* Convert STR to a nonnegative integer, storing the result in *OUT. 624 STR must be a valid context length argument; report an error if it 625 isn't. Silently ceiling *OUT at the maximum value, as that is 626 practically equivalent to infinity for grep's purposes. */ 627 static void 628 context_length_arg (char const *str, intmax_t *out) 629 { 630 switch (xstrtoimax (str, 0, 10, out, "")) 631 { 632 case LONGINT_OK: 633 case LONGINT_OVERFLOW: 634 if (0 <= *out) 635 break; 636 /* Fall through. */ 637 default: 638 error (EXIT_TROUBLE, 0, "%s: %s", str, 639 _("invalid context length argument")); 640 } 641 } 642 643 /* Return true if the file with NAME should be skipped. 644 If COMMAND_LINE, it is a command-line argument. 645 If IS_DIR, it is a directory. */ 646 static bool 647 skipped_file (char const *name, bool command_line, bool is_dir) 648 { 649 return (is_dir 650 ? (directories == SKIP_DIRECTORIES 651 || (! (command_line && omit_dot_slash) 652 && excluded_directory_patterns 653 && excluded_file_name (excluded_directory_patterns, name))) 654 : (excluded_patterns 655 && excluded_file_name (excluded_patterns, name))); 656 } 657 658 /* Hairy buffering mechanism for grep. The intent is to keep 659 all reads aligned on a page boundary and multiples of the 660 page size, unless a read yields a partial page. */ 661 662 static char *buffer; /* Base of buffer. */ 663 static size_t bufalloc; /* Allocated buffer size, counting slop. */ 664 #define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ 665 static int bufdesc; /* File descriptor. */ 666 static char *bufbeg; /* Beginning of user-visible stuff. */ 667 static char *buflim; /* Limit of user-visible stuff. */ 668 static size_t pagesize; /* alignment of memory pages */ 669 static off_t bufoffset; /* Read offset; defined on regular files. */ 670 static off_t after_last_match; /* Pointer after last matching line that 671 would have been output if we were 672 outputting characters. */ 673 static bool skip_nuls; /* Skip '\0' in data. */ 674 static bool skip_empty_lines; /* Skip empty lines in data. */ 675 static bool seek_data_failed; /* lseek with SEEK_DATA failed. */ 676 static uintmax_t totalnl; /* Total newline count before lastnl. */ 677 678 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be 679 an integer or a pointer. Both args must be free of side effects. */ 680 #define ALIGN_TO(val, alignment) \ 681 ((size_t) (val) % (alignment) == 0 \ 682 ? (val) \ 683 : (val) + ((alignment) - (size_t) (val) % (alignment))) 684 685 /* Add two numbers that count input bytes or lines, and report an 686 error if the addition overflows. */ 687 static uintmax_t 688 add_count (uintmax_t a, uintmax_t b) 689 { 690 uintmax_t sum = a + b; 691 if (sum < a) 692 error (EXIT_TROUBLE, 0, _("input is too large to count")); 693 return sum; 694 } 695 696 /* Return true if BUF (of size SIZE) is all zeros. */ 697 static bool 698 all_zeros (char const *buf, size_t size) 699 { 700 for (char const *p = buf; p < buf + size; p++) 701 if (*p) 702 return false; 703 return true; 704 } 705 706 /* Reset the buffer for a new file, returning false if we should skip it. 707 Initialize on the first time through. */ 708 static bool 709 reset (int fd, struct stat const *st) 710 { 711 if (! pagesize) 712 { 713 pagesize = getpagesize (); 714 if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) 715 abort (); 716 bufalloc = (ALIGN_TO (INITIAL_BUFSIZE, pagesize) 717 + pagesize + sizeof (uword)); 718 buffer = xmalloc (bufalloc); 719 } 720 721 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); 722 bufbeg[-1] = eolbyte; 723 bufdesc = fd; 724 725 if (S_ISREG (st->st_mode)) 726 { 727 if (fd != STDIN_FILENO) 728 bufoffset = 0; 729 else 730 { 731 bufoffset = lseek (fd, 0, SEEK_CUR); 732 if (bufoffset < 0) 733 { 734 suppressible_error (_("lseek failed"), errno); 735 return false; 736 } 737 } 738 } 739 return true; 740 } 741 742 /* Read new stuff into the buffer, saving the specified 743 amount of old stuff. When we're done, 'bufbeg' points 744 to the beginning of the buffer contents, and 'buflim' 745 points just after the end. Return false if there's an error. */ 746 static bool 747 fillbuf (size_t save, struct stat const *st) 748 { 749 size_t fillsize; 750 bool cc = true; 751 char *readbuf; 752 size_t readsize; 753 754 /* Offset from start of buffer to start of old stuff 755 that we want to save. */ 756 size_t saved_offset = buflim - save - buffer; 757 758 if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim) 759 { 760 readbuf = buflim; 761 bufbeg = buflim - save; 762 } 763 else 764 { 765 size_t minsize = save + pagesize; 766 size_t newsize; 767 size_t newalloc; 768 char *newbuf; 769 770 /* Grow newsize until it is at least as great as minsize. */ 771 for (newsize = bufalloc - pagesize - sizeof (uword); 772 newsize < minsize; 773 newsize *= 2) 774 if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize) 775 xalloc_die (); 776 777 /* Try not to allocate more memory than the file size indicates, 778 as that might cause unnecessary memory exhaustion if the file 779 is large. However, do not use the original file size as a 780 heuristic if we've already read past the file end, as most 781 likely the file is growing. */ 782 if (usable_st_size (st)) 783 { 784 off_t to_be_read = st->st_size - bufoffset; 785 off_t maxsize_off = save + to_be_read; 786 if (0 <= to_be_read && to_be_read <= maxsize_off 787 && maxsize_off == (size_t) maxsize_off 788 && minsize <= (size_t) maxsize_off 789 && (size_t) maxsize_off < newsize) 790 newsize = maxsize_off; 791 } 792 793 /* Add enough room so that the buffer is aligned and has room 794 for byte sentinels fore and aft, and so that a uword can 795 be read aft. */ 796 newalloc = newsize + pagesize + sizeof (uword); 797 798 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; 799 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); 800 bufbeg = readbuf - save; 801 memmove (bufbeg, buffer + saved_offset, save); 802 bufbeg[-1] = eolbyte; 803 if (newbuf != buffer) 804 { 805 free (buffer); 806 buffer = newbuf; 807 } 808 } 809 810 clear_asan_poison (); 811 812 readsize = buffer + bufalloc - sizeof (uword) - readbuf; 813 readsize -= readsize % pagesize; 814 815 while (true) 816 { 817 fillsize = safe_read (bufdesc, readbuf, readsize); 818 if (fillsize == SAFE_READ_ERROR) 819 { 820 fillsize = 0; 821 cc = false; 822 } 823 bufoffset += fillsize; 824 825 if (fillsize == 0 || !skip_nuls || !all_zeros (readbuf, fillsize)) 826 break; 827 totalnl = add_count (totalnl, fillsize); 828 829 if (SEEK_DATA != SEEK_SET && !seek_data_failed) 830 { 831 /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */ 832 off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA); 833 if (data_start < 0 && errno == ENXIO 834 && usable_st_size (st) && bufoffset < st->st_size) 835 data_start = lseek (bufdesc, 0, SEEK_END); 836 837 if (data_start < 0) 838 seek_data_failed = true; 839 else 840 { 841 totalnl = add_count (totalnl, data_start - bufoffset); 842 bufoffset = data_start; 843 } 844 } 845 } 846 847 fillsize = undossify_input (readbuf, fillsize); 848 buflim = readbuf + fillsize; 849 850 /* Initialize the following word, because skip_easy_bytes and some 851 matchers read (but do not use) those bytes. This avoids false 852 positive reports of these bytes being used uninitialized. */ 853 memset (buflim, 0, sizeof (uword)); 854 855 /* Mark the part of the buffer not filled by the read or set by 856 the above memset call as ASAN-poisoned. */ 857 asan_poison (buflim + sizeof (uword), 858 bufalloc - (buflim - buffer) - sizeof (uword)); 859 860 return cc; 861 } 862 863 /* Flags controlling the style of output. */ 864 static enum 865 { 866 BINARY_BINARY_FILES, 867 TEXT_BINARY_FILES, 868 WITHOUT_MATCH_BINARY_FILES 869 } binary_files; /* How to handle binary files. */ 870 871 static int filename_mask; /* If zero, output nulls after filenames. */ 872 static bool out_quiet; /* Suppress all normal output. */ 873 static bool out_invert; /* Print nonmatching stuff. */ 874 static int out_file; /* Print filenames. */ 875 static bool out_line; /* Print line numbers. */ 876 static bool out_byte; /* Print byte offsets. */ 877 static intmax_t out_before; /* Lines of leading context. */ 878 static intmax_t out_after; /* Lines of trailing context. */ 879 static bool count_matches; /* Count matching lines. */ 880 static int list_files; /* List matching files. */ 881 static bool no_filenames; /* Suppress file names. */ 882 static intmax_t max_count; /* Stop after outputting this many 883 lines from an input file. */ 884 static bool line_buffered; /* Use line buffering. */ 885 static char *label = NULL; /* Fake filename for stdin */ 886 887 888 /* Internal variables to keep track of byte count, context, etc. */ 889 static uintmax_t totalcc; /* Total character count before bufbeg. */ 890 static char const *lastnl; /* Pointer after last newline counted. */ 891 static char const *lastout; /* Pointer after last character output; 892 NULL if no character has been output 893 or if it's conceptually before bufbeg. */ 894 static intmax_t outleft; /* Maximum number of lines to be output. */ 895 static intmax_t pending; /* Pending lines of output. 896 Always kept 0 if out_quiet is true. */ 897 static bool done_on_match; /* Stop scanning file on first match. */ 898 static bool exit_on_match; /* Exit on first match. */ 899 900 #include "dosbuf.c" 901 902 static void 903 nlscan (char const *lim) 904 { 905 size_t newlines = 0; 906 char const *beg; 907 for (beg = lastnl; beg < lim; beg++) 908 { 909 beg = memchr (beg, eolbyte, lim - beg); 910 if (!beg) 911 break; 912 newlines++; 913 } 914 totalnl = add_count (totalnl, newlines); 915 lastnl = lim; 916 } 917 918 /* Print the current filename. */ 919 static void 920 print_filename (void) 921 { 922 pr_sgr_start_if (filename_color); 923 fputs (filename, stdout); 924 pr_sgr_end_if (filename_color); 925 } 926 927 /* Print a character separator. */ 928 static void 929 print_sep (char sep) 930 { 931 pr_sgr_start_if (sep_color); 932 fputc (sep, stdout); 933 pr_sgr_end_if (sep_color); 934 } 935 936 /* Print a line number or a byte offset. */ 937 static void 938 print_offset (uintmax_t pos, int min_width, const char *color) 939 { 940 /* Do not rely on printf to print pos, since uintmax_t may be longer 941 than long, and long long is not portable. */ 942 943 char buf[sizeof pos * CHAR_BIT]; 944 char *p = buf + sizeof buf; 945 946 do 947 { 948 *--p = '0' + pos % 10; 949 --min_width; 950 } 951 while ((pos /= 10) != 0); 952 953 /* Do this to maximize the probability of alignment across lines. */ 954 if (align_tabs) 955 while (--min_width >= 0) 956 *--p = ' '; 957 958 pr_sgr_start_if (color); 959 fwrite (p, 1, buf + sizeof buf - p, stdout); 960 pr_sgr_end_if (color); 961 } 962 963 /* Print a whole line head (filename, line, byte). */ 964 static void 965 print_line_head (char const *beg, char const *lim, char sep) 966 { 967 bool pending_sep = false; 968 969 if (out_file) 970 { 971 print_filename (); 972 if (filename_mask) 973 pending_sep = true; 974 else 975 fputc (0, stdout); 976 } 977 978 if (out_line) 979 { 980 if (lastnl < lim) 981 { 982 nlscan (beg); 983 totalnl = add_count (totalnl, 1); 984 lastnl = lim; 985 } 986 if (pending_sep) 987 print_sep (sep); 988 print_offset (totalnl, 4, line_num_color); 989 pending_sep = true; 990 } 991 992 if (out_byte) 993 { 994 uintmax_t pos = add_count (totalcc, beg - bufbeg); 995 pos = dossified_pos (pos); 996 if (pending_sep) 997 print_sep (sep); 998 print_offset (pos, 6, byte_num_color); 999 pending_sep = true; 1000 } 1001 1002 if (pending_sep) 1003 { 1004 /* This assumes sep is one column wide. 1005 Try doing this any other way with Unicode 1006 (and its combining and wide characters) 1007 filenames and you're wasting your efforts. */ 1008 if (align_tabs) 1009 fputs ("\t\b", stdout); 1010 1011 print_sep (sep); 1012 } 1013 } 1014 1015 static const char * 1016 print_line_middle (const char *beg, const char *lim, 1017 const char *line_color, const char *match_color) 1018 { 1019 size_t match_size; 1020 size_t match_offset; 1021 const char *cur = beg; 1022 const char *mid = NULL; 1023 1024 while (cur < lim 1025 && ((match_offset = execute (beg, lim - beg, &match_size, cur)) 1026 != (size_t) -1)) 1027 { 1028 char const *b = beg + match_offset; 1029 1030 /* Avoid matching the empty line at the end of the buffer. */ 1031 if (b == lim) 1032 break; 1033 1034 /* Avoid hanging on grep --color "" foo */ 1035 if (match_size == 0) 1036 { 1037 /* Make minimal progress; there may be further non-empty matches. */ 1038 /* XXX - Could really advance by one whole multi-octet character. */ 1039 match_size = 1; 1040 if (!mid) 1041 mid = cur; 1042 } 1043 else 1044 { 1045 /* This function is called on a matching line only, 1046 but is it selected or rejected/context? */ 1047 if (only_matching) 1048 print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED 1049 : SEP_CHAR_SELECTED)); 1050 else 1051 { 1052 pr_sgr_start (line_color); 1053 if (mid) 1054 { 1055 cur = mid; 1056 mid = NULL; 1057 } 1058 fwrite (cur, sizeof (char), b - cur, stdout); 1059 } 1060 1061 pr_sgr_start_if (match_color); 1062 fwrite (b, sizeof (char), match_size, stdout); 1063 pr_sgr_end_if (match_color); 1064 if (only_matching) 1065 fputs ("\n", stdout); 1066 } 1067 cur = b + match_size; 1068 } 1069 1070 if (only_matching) 1071 cur = lim; 1072 else if (mid) 1073 cur = mid; 1074 1075 return cur; 1076 } 1077 1078 static const char * 1079 print_line_tail (const char *beg, const char *lim, const char *line_color) 1080 { 1081 size_t eol_size; 1082 size_t tail_size; 1083 1084 eol_size = (lim > beg && lim[-1] == eolbyte); 1085 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); 1086 tail_size = lim - eol_size - beg; 1087 1088 if (tail_size > 0) 1089 { 1090 pr_sgr_start (line_color); 1091 fwrite (beg, 1, tail_size, stdout); 1092 beg += tail_size; 1093 pr_sgr_end (line_color); 1094 } 1095 1096 return beg; 1097 } 1098 1099 static void 1100 prline (char const *beg, char const *lim, char sep) 1101 { 1102 bool matching; 1103 const char *line_color; 1104 const char *match_color; 1105 1106 if (!only_matching) 1107 print_line_head (beg, lim, sep); 1108 1109 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert; 1110 1111 if (color_option) 1112 { 1113 line_color = (((sep == SEP_CHAR_SELECTED) 1114 ^ (out_invert && (color_option < 0))) 1115 ? selected_line_color : context_line_color); 1116 match_color = (sep == SEP_CHAR_SELECTED 1117 ? selected_match_color : context_match_color); 1118 } 1119 else 1120 line_color = match_color = NULL; /* Shouldn't be used. */ 1121 1122 if ((only_matching && matching) 1123 || (color_option && (*line_color || *match_color))) 1124 { 1125 /* We already know that non-matching lines have no match (to colorize). */ 1126 if (matching && (only_matching || *match_color)) 1127 beg = print_line_middle (beg, lim, line_color, match_color); 1128 1129 if (!only_matching && *line_color) 1130 { 1131 /* This code is exercised at least when grep is invoked like this: 1132 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */ 1133 beg = print_line_tail (beg, lim, line_color); 1134 } 1135 } 1136 1137 if (!only_matching && lim > beg) 1138 fwrite (beg, 1, lim - beg, stdout); 1139 1140 if (ferror (stdout)) 1141 { 1142 write_error_seen = true; 1143 error (EXIT_TROUBLE, 0, _("write error")); 1144 } 1145 1146 lastout = lim; 1147 1148 if (line_buffered) 1149 fflush (stdout); 1150 } 1151 1152 /* Print pending lines of trailing context prior to LIM. Trailing context ends 1153 at the next matching line when OUTLEFT is 0. */ 1154 static void 1155 prpending (char const *lim) 1156 { 1157 if (!lastout) 1158 lastout = bufbeg; 1159 while (pending > 0 && lastout < lim) 1160 { 1161 char const *nl = memchr (lastout, eolbyte, lim - lastout); 1162 size_t match_size; 1163 --pending; 1164 if (outleft 1165 || ((execute (lastout, nl + 1 - lastout, 1166 &match_size, NULL) == (size_t) -1) 1167 == !out_invert)) 1168 prline (lastout, nl + 1, SEP_CHAR_REJECTED); 1169 else 1170 pending = 0; 1171 } 1172 } 1173 1174 /* Output the lines between BEG and LIM. Deal with context. */ 1175 static void 1176 prtext (char const *beg, char const *lim) 1177 { 1178 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */ 1179 char eol = eolbyte; 1180 1181 if (!out_quiet && pending > 0) 1182 prpending (beg); 1183 1184 char const *p = beg; 1185 1186 if (!out_quiet) 1187 { 1188 /* Deal with leading context. */ 1189 char const *bp = lastout ? lastout : bufbeg; 1190 intmax_t i; 1191 for (i = 0; i < out_before; ++i) 1192 if (p > bp) 1193 do 1194 --p; 1195 while (p[-1] != eol); 1196 1197 /* Print the group separator unless the output is adjacent to 1198 the previous output in the file. */ 1199 if ((0 <= out_before || 0 <= out_after) && used 1200 && p != lastout && group_separator) 1201 { 1202 pr_sgr_start_if (sep_color); 1203 fputs (group_separator, stdout); 1204 pr_sgr_end_if (sep_color); 1205 fputc ('\n', stdout); 1206 } 1207 1208 while (p < beg) 1209 { 1210 char const *nl = memchr (p, eol, beg - p); 1211 nl++; 1212 prline (p, nl, SEP_CHAR_REJECTED); 1213 p = nl; 1214 } 1215 } 1216 1217 intmax_t n; 1218 if (out_invert) 1219 { 1220 /* One or more lines are output. */ 1221 for (n = 0; p < lim && n < outleft; n++) 1222 { 1223 char const *nl = memchr (p, eol, lim - p); 1224 nl++; 1225 if (!out_quiet) 1226 prline (p, nl, SEP_CHAR_SELECTED); 1227 p = nl; 1228 } 1229 } 1230 else 1231 { 1232 /* Just one line is output. */ 1233 if (!out_quiet) 1234 prline (beg, lim, SEP_CHAR_SELECTED); 1235 n = 1; 1236 p = lim; 1237 } 1238 1239 after_last_match = bufoffset - (buflim - p); 1240 pending = out_quiet ? 0 : MAX (0, out_after); 1241 used = true; 1242 outleft -= n; 1243 } 1244 1245 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL. 1246 This avoids running out of memory when binary input contains a long 1247 sequence of zeros, which would otherwise be considered to be part 1248 of a long line. P[LIM] should be EOL. */ 1249 static void 1250 zap_nuls (char *p, char *lim, char eol) 1251 { 1252 if (eol) 1253 while (true) 1254 { 1255 *lim = '\0'; 1256 p += strlen (p); 1257 *lim = eol; 1258 if (p == lim) 1259 break; 1260 do 1261 *p++ = eol; 1262 while (!*p); 1263 } 1264 } 1265 1266 /* Scan the specified portion of the buffer, matching lines (or 1267 between matching lines if OUT_INVERT is true). Return a count of 1268 lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */ 1269 static intmax_t 1270 grepbuf (char const *beg, char const *lim) 1271 { 1272 intmax_t outleft0 = outleft; 1273 char const *p; 1274 char const *endp; 1275 1276 for (p = beg; p < lim; p = endp) 1277 { 1278 size_t match_size; 1279 size_t match_offset = execute (p, lim - p, &match_size, NULL); 1280 if (match_offset == (size_t) -1) 1281 { 1282 if (!out_invert) 1283 break; 1284 match_offset = lim - p; 1285 match_size = 0; 1286 } 1287 char const *b = p + match_offset; 1288 endp = b + match_size; 1289 /* Avoid matching the empty line at the end of the buffer. */ 1290 if (!out_invert && b == lim) 1291 break; 1292 if (!out_invert || p < b) 1293 { 1294 char const *prbeg = out_invert ? p : b; 1295 char const *prend = out_invert ? b : endp; 1296 prtext (prbeg, prend); 1297 if (!outleft || done_on_match) 1298 { 1299 if (exit_on_match) 1300 exit (EXIT_SUCCESS); 1301 break; 1302 } 1303 } 1304 } 1305 1306 return outleft0 - outleft; 1307 } 1308 1309 /* Search a given file. Normally, return a count of lines printed; 1310 but if the file is a directory and we search it recursively, then 1311 return -2 if there was a match, and -1 otherwise. */ 1312 static intmax_t 1313 grep (int fd, struct stat const *st) 1314 { 1315 intmax_t nlines, i; 1316 enum textbin textbin; 1317 size_t residue, save; 1318 char oldc; 1319 char *beg; 1320 char *lim; 1321 char eol = eolbyte; 1322 char nul_zapper = '\0'; 1323 bool done_on_match_0 = done_on_match; 1324 bool out_quiet_0 = out_quiet; 1325 1326 if (! reset (fd, st)) 1327 return 0; 1328 1329 totalcc = 0; 1330 lastout = 0; 1331 totalnl = 0; 1332 outleft = max_count; 1333 after_last_match = 0; 1334 pending = 0; 1335 skip_nuls = skip_empty_lines && !eol; 1336 seek_data_failed = false; 1337 1338 nlines = 0; 1339 residue = 0; 1340 save = 0; 1341 1342 if (! fillbuf (save, st)) 1343 { 1344 suppressible_error (filename, errno); 1345 return 0; 1346 } 1347 1348 if (binary_files == TEXT_BINARY_FILES) 1349 textbin = TEXTBIN_TEXT; 1350 else 1351 { 1352 textbin = file_textbin (bufbeg, buflim - bufbeg, fd, st); 1353 if (textbin_is_binary (textbin)) 1354 { 1355 if (binary_files == WITHOUT_MATCH_BINARY_FILES) 1356 return 0; 1357 done_on_match = out_quiet = true; 1358 nul_zapper = eol; 1359 skip_nuls = skip_empty_lines; 1360 } 1361 else if (execute != Pexecute) 1362 textbin = TEXTBIN_TEXT; 1363 } 1364 1365 for (;;) 1366 { 1367 input_textbin = textbin; 1368 lastnl = bufbeg; 1369 if (lastout) 1370 lastout = bufbeg; 1371 1372 beg = bufbeg + save; 1373 1374 /* no more data to scan (eof) except for maybe a residue -> break */ 1375 if (beg == buflim) 1376 break; 1377 1378 zap_nuls (beg, buflim, nul_zapper); 1379 1380 /* Determine new residue (the length of an incomplete line at the end of 1381 the buffer, 0 means there is no incomplete last line). */ 1382 oldc = beg[-1]; 1383 beg[-1] = eol; 1384 /* FIXME: use rawmemrchr if/when it exists, since we have ensured 1385 that this use of memrchr is guaranteed never to return NULL. */ 1386 lim = memrchr (beg - 1, eol, buflim - beg + 1); 1387 ++lim; 1388 beg[-1] = oldc; 1389 if (lim == beg) 1390 lim = beg - residue; 1391 beg -= residue; 1392 residue = buflim - lim; 1393 1394 if (beg < lim) 1395 { 1396 if (outleft) 1397 nlines += grepbuf (beg, lim); 1398 if (pending) 1399 prpending (lim); 1400 if ((!outleft && !pending) || (nlines && done_on_match)) 1401 goto finish_grep; 1402 } 1403 1404 /* The last OUT_BEFORE lines at the end of the buffer will be needed as 1405 leading context if there is a matching line at the begin of the 1406 next data. Make beg point to their begin. */ 1407 i = 0; 1408 beg = lim; 1409 while (i < out_before && beg > bufbeg && beg != lastout) 1410 { 1411 ++i; 1412 do 1413 --beg; 1414 while (beg[-1] != eol); 1415 } 1416 1417 /* Detect whether leading context is adjacent to previous output. */ 1418 if (lastout) 1419 { 1420 if (textbin == TEXTBIN_UNKNOWN) 1421 textbin = TEXTBIN_TEXT; 1422 if (beg != lastout) 1423 lastout = 0; 1424 } 1425 1426 /* Handle some details and read more data to scan. */ 1427 save = residue + lim - beg; 1428 if (out_byte) 1429 totalcc = add_count (totalcc, buflim - bufbeg - save); 1430 if (out_line) 1431 nlscan (beg); 1432 if (! fillbuf (save, st)) 1433 { 1434 suppressible_error (filename, errno); 1435 goto finish_grep; 1436 } 1437 1438 /* If the file's textbin has not been determined yet, assume 1439 it's binary if the next input buffer suggests so. */ 1440 if (textbin == TEXTBIN_UNKNOWN) 1441 { 1442 enum textbin tb = buffer_textbin (bufbeg, buflim - bufbeg); 1443 if (textbin_is_binary (tb)) 1444 { 1445 if (binary_files == WITHOUT_MATCH_BINARY_FILES) 1446 return 0; 1447 textbin = tb; 1448 done_on_match = out_quiet = true; 1449 nul_zapper = eol; 1450 skip_nuls = skip_empty_lines; 1451 } 1452 } 1453 } 1454 if (residue) 1455 { 1456 *buflim++ = eol; 1457 if (outleft) 1458 nlines += grepbuf (bufbeg + save - residue, buflim); 1459 if (pending) 1460 prpending (buflim); 1461 } 1462 1463 finish_grep: 1464 done_on_match = done_on_match_0; 1465 out_quiet = out_quiet_0; 1466 if (textbin_is_binary (textbin) && !out_quiet && nlines != 0) 1467 printf (_("Binary file %s matches\n"), filename); 1468 return nlines; 1469 } 1470 1471 static bool 1472 grepdirent (FTS *fts, FTSENT *ent, bool command_line) 1473 { 1474 bool follow; 1475 int dirdesc; 1476 command_line &= ent->fts_level == FTS_ROOTLEVEL; 1477 1478 if (ent->fts_info == FTS_DP) 1479 { 1480 if (directories == RECURSE_DIRECTORIES && command_line) 1481 out_file &= ~ (2 * !no_filenames); 1482 return true; 1483 } 1484 1485 if (!command_line 1486 && skipped_file (ent->fts_name, false, 1487 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC 1488 || ent->fts_info == FTS_DNR))) 1489 { 1490 fts_set (fts, ent, FTS_SKIP); 1491 return true; 1492 } 1493 1494 filename = ent->fts_path; 1495 if (omit_dot_slash && filename[1]) 1496 filename += 2; 1497 follow = (fts->fts_options & FTS_LOGICAL 1498 || (fts->fts_options & FTS_COMFOLLOW && command_line)); 1499 1500 switch (ent->fts_info) 1501 { 1502 case FTS_D: 1503 if (directories == RECURSE_DIRECTORIES) 1504 { 1505 out_file |= 2 * !no_filenames; 1506 return true; 1507 } 1508 fts_set (fts, ent, FTS_SKIP); 1509 break; 1510 1511 case FTS_DC: 1512 if (!suppress_errors) 1513 error (0, 0, _("warning: %s: %s"), filename, 1514 _("recursive directory loop")); 1515 return true; 1516 1517 case FTS_DNR: 1518 case FTS_ERR: 1519 case FTS_NS: 1520 suppressible_error (filename, ent->fts_errno); 1521 return true; 1522 1523 case FTS_DEFAULT: 1524 case FTS_NSOK: 1525 if (skip_devices (command_line)) 1526 { 1527 struct stat *st = ent->fts_statp; 1528 struct stat st1; 1529 if (! st->st_mode) 1530 { 1531 /* The file type is not already known. Get the file status 1532 before opening, since opening might have side effects 1533 on a device. */ 1534 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; 1535 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) 1536 { 1537 suppressible_error (filename, errno); 1538 return true; 1539 } 1540 st = &st1; 1541 } 1542 if (is_device_mode (st->st_mode)) 1543 return true; 1544 } 1545 break; 1546 1547 case FTS_F: 1548 case FTS_SLNONE: 1549 break; 1550 1551 case FTS_SL: 1552 case FTS_W: 1553 return true; 1554 1555 default: 1556 abort (); 1557 } 1558 1559 dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD 1560 ? fts->fts_cwd_fd 1561 : AT_FDCWD); 1562 return grepfile (dirdesc, ent->fts_accpath, follow, command_line); 1563 } 1564 1565 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'. 1566 POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */ 1567 static bool 1568 open_symlink_nofollow_error (int err) 1569 { 1570 if (err == ELOOP || err == EMLINK) 1571 return true; 1572 #ifdef EFTYPE 1573 if (err == EFTYPE) 1574 return true; 1575 #endif 1576 return false; 1577 } 1578 1579 static bool 1580 grepfile (int dirdesc, char const *name, bool follow, bool command_line) 1581 { 1582 int oflag = (O_RDONLY | O_NOCTTY 1583 | (follow ? 0 : O_NOFOLLOW) 1584 | (skip_devices (command_line) ? O_NONBLOCK : 0)); 1585 int desc = openat_safer (dirdesc, name, oflag); 1586 if (desc < 0) 1587 { 1588 if (follow || ! open_symlink_nofollow_error (errno)) 1589 suppressible_error (filename, errno); 1590 return true; 1591 } 1592 return grepdesc (desc, command_line); 1593 } 1594 1595 static bool 1596 grepdesc (int desc, bool command_line) 1597 { 1598 intmax_t count; 1599 bool status = true; 1600 struct stat st; 1601 1602 /* Get the file status, possibly for the second time. This catches 1603 a race condition if the directory entry changes after the 1604 directory entry is read and before the file is opened. For 1605 example, normally DESC is a directory only at the top level, but 1606 there is an exception if some other process substitutes a 1607 directory for a non-directory while 'grep' is running. */ 1608 if (fstat (desc, &st) != 0) 1609 { 1610 suppressible_error (filename, errno); 1611 goto closeout; 1612 } 1613 1614 if (desc != STDIN_FILENO && skip_devices (command_line) 1615 && is_device_mode (st.st_mode)) 1616 goto closeout; 1617 1618 if (desc != STDIN_FILENO && command_line 1619 && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0)) 1620 goto closeout; 1621 1622 if (desc != STDIN_FILENO 1623 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) 1624 { 1625 /* Traverse the directory starting with its full name, because 1626 unfortunately fts provides no way to traverse the directory 1627 starting from its file descriptor. */ 1628 1629 FTS *fts; 1630 FTSENT *ent; 1631 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW); 1632 char *fts_arg[2]; 1633 1634 /* Close DESC now, to conserve file descriptors if the race 1635 condition occurs many times in a deep recursion. */ 1636 if (close (desc) != 0) 1637 suppressible_error (filename, errno); 1638 1639 fts_arg[0] = (char *) filename; 1640 fts_arg[1] = NULL; 1641 fts = fts_open (fts_arg, opts, NULL); 1642 1643 if (!fts) 1644 xalloc_die (); 1645 while ((ent = fts_read (fts))) 1646 status &= grepdirent (fts, ent, command_line); 1647 if (errno) 1648 suppressible_error (filename, errno); 1649 if (fts_close (fts) != 0) 1650 suppressible_error (filename, errno); 1651 return status; 1652 } 1653 if (desc != STDIN_FILENO 1654 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode)) 1655 || ((devices == SKIP_DEVICES 1656 || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) 1657 && is_device_mode (st.st_mode)))) 1658 goto closeout; 1659 1660 /* If there is a regular file on stdout and the current file refers 1661 to the same i-node, we have to report the problem and skip it. 1662 Otherwise when matching lines from some other input reach the 1663 disk before we open this file, we can end up reading and matching 1664 those lines and appending them to the file from which we're reading. 1665 Then we'd have what appears to be an infinite loop that'd terminate 1666 only upon filling the output file system or reaching a quota. 1667 However, there is no risk of an infinite loop if grep is generating 1668 no output, i.e., with --silent, --quiet, -q. 1669 Similarly, with any of these: 1670 --max-count=N (-m) (for N >= 2) 1671 --files-with-matches (-l) 1672 --files-without-match (-L) 1673 there is no risk of trouble. 1674 For --max-count=1, grep stops after printing the first match, 1675 so there is no risk of malfunction. But even --max-count=2, with 1676 input==output, while there is no risk of infloop, there is a race 1677 condition that could result in "alternate" output. */ 1678 if (!out_quiet && list_files == 0 && 1 < max_count 1679 && S_ISREG (out_stat.st_mode) && out_stat.st_ino 1680 && SAME_INODE (st, out_stat)) 1681 { 1682 if (! suppress_errors) 1683 error (0, 0, _("input file %s is also the output"), quote (filename)); 1684 errseen = true; 1685 goto closeout; 1686 } 1687 1688 #if defined SET_BINARY 1689 /* Set input to binary mode. Pipes are simulated with files 1690 on DOS, so this includes the case of "foo | grep bar". */ 1691 if (!isatty (desc)) 1692 SET_BINARY (desc); 1693 #endif 1694 1695 count = grep (desc, &st); 1696 if (count < 0) 1697 status = count + 2; 1698 else 1699 { 1700 if (count_matches) 1701 { 1702 if (out_file) 1703 { 1704 print_filename (); 1705 if (filename_mask) 1706 print_sep (SEP_CHAR_SELECTED); 1707 else 1708 fputc (0, stdout); 1709 } 1710 printf ("%" PRIdMAX "\n", count); 1711 } 1712 1713 status = !count; 1714 if (list_files == 1 - 2 * status) 1715 { 1716 print_filename (); 1717 fputc ('\n' & filename_mask, stdout); 1718 } 1719 1720 if (desc == STDIN_FILENO) 1721 { 1722 off_t required_offset = outleft ? bufoffset : after_last_match; 1723 if (required_offset != bufoffset 1724 && lseek (desc, required_offset, SEEK_SET) < 0 1725 && S_ISREG (st.st_mode)) 1726 suppressible_error (filename, errno); 1727 } 1728 } 1729 1730 closeout: 1731 if (desc != STDIN_FILENO && close (desc) != 0) 1732 suppressible_error (filename, errno); 1733 return status; 1734 } 1735 1736 static bool 1737 grep_command_line_arg (char const *arg) 1738 { 1739 if (STREQ (arg, "-")) 1740 { 1741 filename = label ? label : _("(standard input)"); 1742 return grepdesc (STDIN_FILENO, true); 1743 } 1744 else 1745 { 1746 filename = arg; 1747 return grepfile (AT_FDCWD, arg, true, true); 1748 } 1749 } 1750 1751 _Noreturn void usage (int); 1752 void 1753 usage (int status) 1754 { 1755 if (status != 0) 1756 { 1757 fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), 1758 program_name); 1759 fprintf (stderr, _("Try '%s --help' for more information.\n"), 1760 program_name); 1761 } 1762 else 1763 { 1764 printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); 1765 printf (_("Search for PATTERN in each FILE or standard input.\n")); 1766 printf (_("PATTERN is, by default, a basic regular expression (BRE).\n")); 1767 printf (_("\ 1768 Example: %s -i 'hello world' menu.h main.c\n\ 1769 \n\ 1770 Regexp selection and interpretation:\n"), program_name); 1771 printf (_("\ 1772 -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ 1773 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ 1774 -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ 1775 -P, --perl-regexp PATTERN is a Perl regular expression\n")); 1776 /* -X is deliberately undocumented. */ 1777 printf (_("\ 1778 -e, --regexp=PATTERN use PATTERN for matching\n\ 1779 -f, --file=FILE obtain PATTERN from FILE\n\ 1780 -i, --ignore-case ignore case distinctions\n\ 1781 -w, --word-regexp force PATTERN to match only whole words\n\ 1782 -x, --line-regexp force PATTERN to match only whole lines\n\ 1783 -z, --null-data a data line ends in 0 byte, not newline\n")); 1784 printf (_("\ 1785 \n\ 1786 Miscellaneous:\n\ 1787 -s, --no-messages suppress error messages\n\ 1788 -v, --invert-match select non-matching lines\n\ 1789 -V, --version display version information and exit\n\ 1790 --help display this help text and exit\n")); 1791 printf (_("\ 1792 \n\ 1793 Output control:\n\ 1794 -m, --max-count=NUM stop after NUM matches\n\ 1795 -b, --byte-offset print the byte offset with output lines\n\ 1796 -n, --line-number print line number with output lines\n\ 1797 --line-buffered flush output on every line\n\ 1798 -H, --with-filename print the file name for each match\n\ 1799 -h, --no-filename suppress the file name prefix on output\n\ 1800 --label=LABEL use LABEL as the standard input file name prefix\n\ 1801 ")); 1802 printf (_("\ 1803 -o, --only-matching show only the part of a line matching PATTERN\n\ 1804 -q, --quiet, --silent suppress all normal output\n\ 1805 --binary-files=TYPE assume that binary files are TYPE;\n\ 1806 TYPE is 'binary', 'text', or 'without-match'\n\ 1807 -a, --text equivalent to --binary-files=text\n\ 1808 ")); 1809 printf (_("\ 1810 -I equivalent to --binary-files=without-match\n\ 1811 -d, --directories=ACTION how to handle directories;\n\ 1812 ACTION is 'read', 'recurse', or 'skip'\n\ 1813 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ 1814 ACTION is 'read' or 'skip'\n\ 1815 -r, --recursive like --directories=recurse\n\ 1816 -R, --dereference-recursive likewise, but follow all symlinks\n\ 1817 ")); 1818 printf (_("\ 1819 --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ 1820 --exclude=FILE_PATTERN skip files and directories matching\ 1821 FILE_PATTERN\n\ 1822 --exclude-from=FILE skip files matching any file pattern from FILE\n\ 1823 --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ 1824 ")); 1825 printf (_("\ 1826 -L, --files-without-match print only names of FILEs containing no match\n\ 1827 -l, --files-with-matches print only names of FILEs containing matches\n\ 1828 -c, --count print only a count of matching lines per FILE\n\ 1829 -T, --initial-tab make tabs line up (if needed)\n\ 1830 -Z, --null print 0 byte after FILE name\n")); 1831 printf (_("\ 1832 \n\ 1833 Context control:\n\ 1834 -B, --before-context=NUM print NUM lines of leading context\n\ 1835 -A, --after-context=NUM print NUM lines of trailing context\n\ 1836 -C, --context=NUM print NUM lines of output context\n\ 1837 ")); 1838 printf (_("\ 1839 -NUM same as --context=NUM\n\ 1840 --color[=WHEN],\n\ 1841 --colour[=WHEN] use markers to highlight the matching strings;\n\ 1842 WHEN is 'always', 'never', or 'auto'\n\ 1843 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ 1844 -u, --unix-byte-offsets report offsets as if CRs were not there\n\ 1845 (MSDOS/Windows)\n\ 1846 \n")); 1847 printf (_("\ 1848 'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ 1849 Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n")); 1850 printf (_("\ 1851 When FILE is -, read standard input. With no FILE, read . if a command-line\n\ 1852 -r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ 1853 Exit status is 0 if any line is selected, 1 otherwise;\n\ 1854 if any error occurs and -q is not given, the exit status is 2.\n")); 1855 emit_bug_reporting_address (); 1856 } 1857 exit (status); 1858 } 1859 1860 /* Pattern compilers and matchers. */ 1861 1862 static void 1863 Gcompile (char const *pattern, size_t size) 1864 { 1865 GEAcompile (pattern, size, RE_SYNTAX_GREP); 1866 } 1867 1868 static void 1869 Ecompile (char const *pattern, size_t size) 1870 { 1871 GEAcompile (pattern, size, RE_SYNTAX_EGREP); 1872 } 1873 1874 static void 1875 Acompile (char const *pattern, size_t size) 1876 { 1877 GEAcompile (pattern, size, RE_SYNTAX_AWK); 1878 } 1879 1880 static void 1881 GAcompile (char const *pattern, size_t size) 1882 { 1883 GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK); 1884 } 1885 1886 static void 1887 PAcompile (char const *pattern, size_t size) 1888 { 1889 GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK); 1890 } 1891 1892 struct matcher 1893 { 1894 char const name[16]; 1895 compile_fp_t compile; 1896 execute_fp_t execute; 1897 }; 1898 static struct matcher const matchers[] = { 1899 { "grep", Gcompile, EGexecute }, 1900 { "egrep", Ecompile, EGexecute }, 1901 { "fgrep", Fcompile, Fexecute }, 1902 { "awk", Acompile, EGexecute }, 1903 { "gawk", GAcompile, EGexecute }, 1904 { "posixawk", PAcompile, EGexecute }, 1905 { "perl", Pcompile, Pexecute }, 1906 { "", NULL, NULL }, 1907 }; 1908 1909 /* Set the matcher to M if available. Exit in case of conflicts or if 1910 M is not available. */ 1911 static void 1912 setmatcher (char const *m) 1913 { 1914 struct matcher const *p; 1915 1916 if (matcher && !STREQ (matcher, m)) 1917 error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); 1918 1919 for (p = matchers; p->compile; p++) 1920 if (STREQ (m, p->name)) 1921 { 1922 matcher = p->name; 1923 compile = p->compile; 1924 execute = p->execute; 1925 return; 1926 } 1927 1928 error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); 1929 } 1930 1931 /* Find the white-space-separated options specified by OPTIONS, and 1932 using BUF to store copies of these options, set ARGV[0], ARGV[1], 1933 etc. to the option copies. Return the number N of options found. 1934 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] 1935 etc. Backslash can be used to escape whitespace (and backslashes). */ 1936 static size_t 1937 prepend_args (char const *options, char *buf, char **argv) 1938 { 1939 char const *o = options; 1940 char *b = buf; 1941 size_t n = 0; 1942 1943 for (;;) 1944 { 1945 while (c_isspace (to_uchar (*o))) 1946 o++; 1947 if (!*o) 1948 return n; 1949 if (argv) 1950 argv[n] = b; 1951 n++; 1952 1953 do 1954 if ((*b++ = *o++) == '\\' && *o) 1955 b[-1] = *o++; 1956 while (*o && ! c_isspace (to_uchar (*o))); 1957 1958 *b++ = '\0'; 1959 } 1960 } 1961 1962 /* Prepend the whitespace-separated options in OPTIONS to the argument 1963 vector of a main program with argument count *PARGC and argument 1964 vector *PARGV. Return the number of options prepended. */ 1965 static int 1966 prepend_default_options (char const *options, int *pargc, char ***pargv) 1967 { 1968 if (options && *options) 1969 { 1970 char *buf = xmalloc (strlen (options) + 1); 1971 size_t prepended = prepend_args (options, buf, NULL); 1972 int argc = *pargc; 1973 char *const *argv = *pargv; 1974 char **pp; 1975 enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) }; 1976 if (MAX_ARGS - argc < prepended) 1977 xalloc_die (); 1978 pp = xmalloc ((prepended + argc + 1) * sizeof *pp); 1979 *pargc = prepended + argc; 1980 *pargv = pp; 1981 *pp++ = *argv++; 1982 pp += prepend_args (options, buf, pp); 1983 while ((*pp++ = *argv++)) 1984 continue; 1985 return prepended; 1986 } 1987 1988 return 0; 1989 } 1990 1991 /* Get the next non-digit option from ARGC and ARGV. 1992 Return -1 if there are no more options. 1993 Process any digit options that were encountered on the way, 1994 and store the resulting integer into *DEFAULT_CONTEXT. */ 1995 static int 1996 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) 1997 { 1998 static int prev_digit_optind = -1; 1999 int this_digit_optind; 2000 bool was_digit; 2001 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4]; 2002 char *p = buf; 2003 int opt; 2004 2005 was_digit = false; 2006 this_digit_optind = optind; 2007 while (true) 2008 { 2009 opt = getopt_long (argc, (char **) argv, short_options, 2010 long_options, NULL); 2011 if ( ! ('0' <= opt && opt <= '9')) 2012 break; 2013 2014 if (prev_digit_optind != this_digit_optind || !was_digit) 2015 { 2016 /* Reset to start another context length argument. */ 2017 p = buf; 2018 } 2019 else 2020 { 2021 /* Suppress trivial leading zeros, to avoid incorrect 2022 diagnostic on strings like 00000000000. */ 2023 p -= buf[0] == '0'; 2024 } 2025 2026 if (p == buf + sizeof buf - 4) 2027 { 2028 /* Too many digits. Append "..." to make context_length_arg 2029 complain about "X...", where X contains the digits seen 2030 so far. */ 2031 strcpy (p, "..."); 2032 p += 3; 2033 break; 2034 } 2035 *p++ = opt; 2036 2037 was_digit = true; 2038 prev_digit_optind = this_digit_optind; 2039 this_digit_optind = optind; 2040 } 2041 if (p != buf) 2042 { 2043 *p = '\0'; 2044 context_length_arg (buf, default_context); 2045 } 2046 2047 return opt; 2048 } 2049 2050 /* Parse GREP_COLORS. The default would look like: 2051 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36' 2052 with boolean capabilities (ne and rv) unset (i.e., omitted). 2053 No character escaping is needed or supported. */ 2054 static void 2055 parse_grep_colors (void) 2056 { 2057 const char *p; 2058 char *q; 2059 char *name; 2060 char *val; 2061 2062 p = getenv ("GREP_COLORS"); /* Plural! */ 2063 if (p == NULL || *p == '\0') 2064 return; 2065 2066 /* Work off a writable copy. */ 2067 q = xstrdup (p); 2068 2069 name = q; 2070 val = NULL; 2071 /* From now on, be well-formed or you're gone. */ 2072 for (;;) 2073 if (*q == ':' || *q == '\0') 2074 { 2075 char c = *q; 2076 struct color_cap const *cap; 2077 2078 *q++ = '\0'; /* Terminate name or val. */ 2079 /* Empty name without val (empty cap) 2080 * won't match and will be ignored. */ 2081 for (cap = color_dict; cap->name; cap++) 2082 if (STREQ (cap->name, name)) 2083 break; 2084 /* If name unknown, go on for forward compatibility. */ 2085 if (cap->var && val) 2086 *(cap->var) = val; 2087 if (cap->fct) 2088 cap->fct (); 2089 if (c == '\0') 2090 return; 2091 name = q; 2092 val = NULL; 2093 } 2094 else if (*q == '=') 2095 { 2096 if (q == name || val) 2097 return; 2098 *q++ = '\0'; /* Terminate name. */ 2099 val = q; /* Can be the empty string. */ 2100 } 2101 else if (val == NULL) 2102 q++; /* Accumulate name. */ 2103 else if (*q == ';' || (*q >= '0' && *q <= '9')) 2104 q++; /* Accumulate val. Protect the terminal from being sent crap. */ 2105 else 2106 return; 2107 } 2108 2109 /* Return true if PAT (of length PATLEN) contains an encoding error. */ 2110 static bool 2111 contains_encoding_error (char const *pat, size_t patlen) 2112 { 2113 mbstate_t mbs = { 0 }; 2114 size_t i, charlen; 2115 2116 for (i = 0; i < patlen; i += charlen) 2117 { 2118 charlen = mb_clen (pat + i, patlen - i, &mbs); 2119 if ((size_t) -2 <= charlen) 2120 return true; 2121 } 2122 return false; 2123 } 2124 2125 /* Change a pattern for fgrep into grep. */ 2126 static void 2127 fgrep_to_grep_pattern (size_t len, char const *keys, 2128 size_t *new_len, char **new_keys) 2129 { 2130 char *p = *new_keys = xnmalloc (len + 1, 2); 2131 mbstate_t mb_state = { 0 }; 2132 size_t n; 2133 2134 for (; len; keys += n, len -= n) 2135 { 2136 n = mb_clen (keys, len, &mb_state); 2137 switch (n) 2138 { 2139 case (size_t) -2: 2140 n = len; 2141 /* Fall through. */ 2142 default: 2143 p = mempcpy (p, keys, n); 2144 break; 2145 2146 case (size_t) -1: 2147 memset (&mb_state, 0, sizeof mb_state); 2148 /* Fall through. */ 2149 case 1: 2150 *p = '\\'; 2151 p += strchr ("$*.[\\^", *keys) != NULL; 2152 /* Fall through. */ 2153 case 0: 2154 *p++ = *keys; 2155 n = 1; 2156 break; 2157 } 2158 } 2159 2160 *new_len = p - *new_keys; 2161 } 2162 2163 int 2164 main (int argc, char **argv) 2165 { 2166 char *keys; 2167 size_t keycc, oldcc, keyalloc; 2168 bool with_filenames; 2169 size_t cc; 2170 int opt, prepended; 2171 int prev_optind, last_recursive; 2172 int fread_errno; 2173 intmax_t default_context; 2174 FILE *fp; 2175 exit_failure = EXIT_TROUBLE; 2176 initialize_main (&argc, &argv); 2177 set_program_name (argv[0]); 2178 program_name = argv[0]; 2179 2180 keys = NULL; 2181 keycc = 0; 2182 with_filenames = false; 2183 eolbyte = '\n'; 2184 filename_mask = ~0; 2185 2186 max_count = INTMAX_MAX; 2187 2188 /* The value -1 means to use DEFAULT_CONTEXT. */ 2189 out_after = out_before = -1; 2190 /* Default before/after context: changed by -C/-NUM options */ 2191 default_context = -1; 2192 /* Changed by -o option */ 2193 only_matching = false; 2194 2195 /* Internationalization. */ 2196 #if defined HAVE_SETLOCALE 2197 setlocale (LC_ALL, ""); 2198 #endif 2199 #if defined ENABLE_NLS 2200 bindtextdomain (PACKAGE, LOCALEDIR); 2201 textdomain (PACKAGE); 2202 #endif 2203 2204 exit_failure = EXIT_TROUBLE; 2205 atexit (clean_up_stdout); 2206 2207 last_recursive = 0; 2208 2209 prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); 2210 if (prepended) 2211 error (0, 0, _("warning: GREP_OPTIONS is deprecated;" 2212 " please use an alias or script")); 2213 2214 compile = matchers[0].compile; 2215 execute = matchers[0].execute; 2216 2217 while (prev_optind = optind, 2218 (opt = get_nondigit_option (argc, argv, &default_context)) != -1) 2219 switch (opt) 2220 { 2221 case 'A': 2222 context_length_arg (optarg, &out_after); 2223 break; 2224 2225 case 'B': 2226 context_length_arg (optarg, &out_before); 2227 break; 2228 2229 case 'C': 2230 /* Set output match context, but let any explicit leading or 2231 trailing amount specified with -A or -B stand. */ 2232 context_length_arg (optarg, &default_context); 2233 break; 2234 2235 case 'D': 2236 if (STREQ (optarg, "read")) 2237 devices = READ_DEVICES; 2238 else if (STREQ (optarg, "skip")) 2239 devices = SKIP_DEVICES; 2240 else 2241 error (EXIT_TROUBLE, 0, _("unknown devices method")); 2242 break; 2243 2244 case 'E': 2245 setmatcher ("egrep"); 2246 break; 2247 2248 case 'F': 2249 setmatcher ("fgrep"); 2250 break; 2251 2252 case 'P': 2253 setmatcher ("perl"); 2254 break; 2255 2256 case 'G': 2257 setmatcher ("grep"); 2258 break; 2259 2260 case 'X': /* undocumented on purpose */ 2261 setmatcher (optarg); 2262 break; 2263 2264 case 'H': 2265 with_filenames = true; 2266 no_filenames = false; 2267 break; 2268 2269 case 'I': 2270 binary_files = WITHOUT_MATCH_BINARY_FILES; 2271 break; 2272 2273 case 'T': 2274 align_tabs = true; 2275 break; 2276 2277 case 'U': 2278 dos_binary (); 2279 break; 2280 2281 case 'u': 2282 dos_unix_byte_offsets (); 2283 break; 2284 2285 case 'V': 2286 show_version = true; 2287 break; 2288 2289 case 'a': 2290 binary_files = TEXT_BINARY_FILES; 2291 break; 2292 2293 case 'b': 2294 out_byte = true; 2295 break; 2296 2297 case 'c': 2298 count_matches = true; 2299 break; 2300 2301 case 'd': 2302 directories = XARGMATCH ("--directories", optarg, 2303 directories_args, directories_types); 2304 if (directories == RECURSE_DIRECTORIES) 2305 last_recursive = prev_optind; 2306 break; 2307 2308 case 'e': 2309 cc = strlen (optarg); 2310 keys = xrealloc (keys, keycc + cc + 1); 2311 strcpy (&keys[keycc], optarg); 2312 keycc += cc; 2313 keys[keycc++] = '\n'; 2314 break; 2315 2316 case 'f': 2317 fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r"); 2318 if (!fp) 2319 error (EXIT_TROUBLE, errno, "%s", optarg); 2320 for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) 2321 ; 2322 keys = xrealloc (keys, keyalloc); 2323 oldcc = keycc; 2324 while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0) 2325 { 2326 keycc += cc; 2327 if (keycc == keyalloc - 1) 2328 keys = x2nrealloc (keys, &keyalloc, sizeof *keys); 2329 } 2330 fread_errno = errno; 2331 if (ferror (fp)) 2332 error (EXIT_TROUBLE, fread_errno, "%s", optarg); 2333 if (fp != stdin) 2334 fclose (fp); 2335 /* Append final newline if file ended in non-newline. */ 2336 if (oldcc != keycc && keys[keycc - 1] != '\n') 2337 keys[keycc++] = '\n'; 2338 break; 2339 2340 case 'h': 2341 with_filenames = false; 2342 no_filenames = true; 2343 break; 2344 2345 case 'i': 2346 case 'y': /* For old-timers . . . */ 2347 match_icase = true; 2348 break; 2349 2350 case 'L': 2351 /* Like -l, except list files that don't contain matches. 2352 Inspired by the same option in Hume's gre. */ 2353 list_files = -1; 2354 break; 2355 2356 case 'l': 2357 list_files = 1; 2358 break; 2359 2360 case 'm': 2361 switch (xstrtoimax (optarg, 0, 10, &max_count, "")) 2362 { 2363 case LONGINT_OK: 2364 case LONGINT_OVERFLOW: 2365 break; 2366 2367 default: 2368 error (EXIT_TROUBLE, 0, _("invalid max count")); 2369 } 2370 break; 2371 2372 case 'n': 2373 out_line = true; 2374 break; 2375 2376 case 'o': 2377 only_matching = true; 2378 break; 2379 2380 case 'q': 2381 exit_on_match = true; 2382 exit_failure = 0; 2383 break; 2384 2385 case 'R': 2386 fts_options = basic_fts_options | FTS_LOGICAL; 2387 /* Fall through. */ 2388 case 'r': 2389 directories = RECURSE_DIRECTORIES; 2390 last_recursive = prev_optind; 2391 break; 2392 2393 case 's': 2394 suppress_errors = true; 2395 break; 2396 2397 case 'v': 2398 out_invert = true; 2399 break; 2400 2401 case 'w': 2402 match_words = true; 2403 break; 2404 2405 case 'x': 2406 match_lines = true; 2407 break; 2408 2409 case 'Z': 2410 filename_mask = 0; 2411 break; 2412 2413 case 'z': 2414 eolbyte = '\0'; 2415 break; 2416 2417 case BINARY_FILES_OPTION: 2418 if (STREQ (optarg, "binary")) 2419 binary_files = BINARY_BINARY_FILES; 2420 else if (STREQ (optarg, "text")) 2421 binary_files = TEXT_BINARY_FILES; 2422 else if (STREQ (optarg, "without-match")) 2423 binary_files = WITHOUT_MATCH_BINARY_FILES; 2424 else 2425 error (EXIT_TROUBLE, 0, _("unknown binary-files type")); 2426 break; 2427 2428 case COLOR_OPTION: 2429 if (optarg) 2430 { 2431 if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") 2432 || !strcasecmp (optarg, "force")) 2433 color_option = 1; 2434 else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") 2435 || !strcasecmp (optarg, "none")) 2436 color_option = 0; 2437 else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") 2438 || !strcasecmp (optarg, "if-tty")) 2439 color_option = 2; 2440 else 2441 show_help = 1; 2442 } 2443 else 2444 color_option = 2; 2445 break; 2446 2447 case EXCLUDE_OPTION: 2448 case INCLUDE_OPTION: 2449 if (!excluded_patterns) 2450 excluded_patterns = new_exclude (); 2451 add_exclude (excluded_patterns, optarg, 2452 (EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS 2453 | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); 2454 break; 2455 case EXCLUDE_FROM_OPTION: 2456 if (!excluded_patterns) 2457 excluded_patterns = new_exclude (); 2458 if (add_exclude_file (add_exclude, excluded_patterns, optarg, 2459 EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS, '\n') != 0) 2460 { 2461 error (EXIT_TROUBLE, errno, "%s", optarg); 2462 } 2463 break; 2464 2465 case EXCLUDE_DIRECTORY_OPTION: 2466 if (!excluded_directory_patterns) 2467 excluded_directory_patterns = new_exclude (); 2468 strip_trailing_slashes (optarg); 2469 add_exclude (excluded_directory_patterns, optarg, 2470 EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS); 2471 break; 2472 2473 case GROUP_SEPARATOR_OPTION: 2474 group_separator = optarg; 2475 break; 2476 2477 case LINE_BUFFERED_OPTION: 2478 line_buffered = true; 2479 break; 2480 2481 case LABEL_OPTION: 2482 label = optarg; 2483 break; 2484 2485 case 0: 2486 /* long options */ 2487 break; 2488 2489 default: 2490 usage (EXIT_TROUBLE); 2491 break; 2492 2493 } 2494 2495 if (color_option == 2) 2496 color_option = isatty (STDOUT_FILENO) && should_colorize (); 2497 init_colorize (); 2498 2499 /* POSIX says that -q overrides -l, which in turn overrides the 2500 other output options. */ 2501 if (exit_on_match) 2502 list_files = 0; 2503 if (exit_on_match | list_files) 2504 { 2505 count_matches = false; 2506 done_on_match = true; 2507 } 2508 out_quiet = count_matches | done_on_match; 2509 2510 if (out_after < 0) 2511 out_after = default_context; 2512 if (out_before < 0) 2513 out_before = default_context; 2514 2515 if (color_option) 2516 { 2517 /* Legacy. */ 2518 char *userval = getenv ("GREP_COLOR"); 2519 if (userval != NULL && *userval != '\0') 2520 selected_match_color = context_match_color = userval; 2521 2522 /* New GREP_COLORS has priority. */ 2523 parse_grep_colors (); 2524 } 2525 2526 if (show_version) 2527 { 2528 version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, 2529 (char *) NULL); 2530 return EXIT_SUCCESS; 2531 } 2532 2533 if (show_help) 2534 usage (EXIT_SUCCESS); 2535 2536 struct stat tmp_stat; 2537 if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) 2538 out_stat = tmp_stat; 2539 2540 if (keys) 2541 { 2542 if (keycc == 0) 2543 { 2544 /* No keys were specified (e.g. -f /dev/null). Match nothing. */ 2545 out_invert ^= true; 2546 match_lines = match_words = false; 2547 } 2548 else 2549 /* Strip trailing newline. */ 2550 --keycc; 2551 } 2552 else if (optind < argc) 2553 { 2554 /* A copy must be made in case of an xrealloc() or free() later. */ 2555 keycc = strlen (argv[optind]); 2556 keys = xmemdup (argv[optind++], keycc + 1); 2557 } 2558 else 2559 usage (EXIT_TROUBLE); 2560 2561 build_mbclen_cache (); 2562 init_easy_encoding (); 2563 2564 /* In a unibyte locale, switch from fgrep to grep if 2565 the pattern matches words (where grep is typically faster). 2566 In a multibyte locale, switch from fgrep to grep if either 2567 (1) case is ignored (where grep is typically faster), or 2568 (2) the pattern has an encoding error (where fgrep might not work). */ 2569 if (compile == Fcompile 2570 && (MB_CUR_MAX <= 1 2571 ? match_words 2572 : match_icase || contains_encoding_error (keys, keycc))) 2573 { 2574 size_t new_keycc; 2575 char *new_keys; 2576 fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys); 2577 free (keys); 2578 keys = new_keys; 2579 keycc = new_keycc; 2580 matcher = "grep"; 2581 compile = Gcompile; 2582 execute = EGexecute; 2583 } 2584 2585 compile (keys, keycc); 2586 free (keys); 2587 /* We need one byte prior and one after. */ 2588 char eolbytes[3] = { 0, eolbyte, 0 }; 2589 size_t match_size; 2590 skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0) 2591 == out_invert); 2592 2593 if ((argc - optind > 1 && !no_filenames) || with_filenames) 2594 out_file = 1; 2595 2596 #ifdef SET_BINARY 2597 /* Output is set to binary mode because we shouldn't convert 2598 NL to CR-LF pairs, especially when grepping binary files. */ 2599 if (!isatty (STDOUT_FILENO)) 2600 SET_BINARY (STDOUT_FILENO); 2601 #endif 2602 2603 if (max_count == 0) 2604 return EXIT_FAILURE; 2605 2606 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) 2607 devices = READ_DEVICES; 2608 2609 char *const *files; 2610 if (optind < argc) 2611 { 2612 files = argv + optind; 2613 } 2614 else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) 2615 { 2616 static char *const cwd_only[] = { (char *) ".", NULL }; 2617 files = cwd_only; 2618 omit_dot_slash = true; 2619 } 2620 else 2621 { 2622 static char *const stdin_only[] = { (char *) "-", NULL }; 2623 files = stdin_only; 2624 } 2625 2626 bool status = true; 2627 do 2628 status &= grep_command_line_arg (*files++); 2629 while (*files != NULL); 2630 2631 /* We register via atexit() to test stdout. */ 2632 return errseen ? EXIT_TROUBLE : status; 2633 } 2634