xref: /dflybsd-src/contrib/grep/src/grep.c (revision dc7c36e42d07cfd62c8aa566489e061f2ff94edc)
1 /* grep.c - main driver file for grep.
2    Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Written July 1992 by Mike Haertel.  */
20 
21 #include <config.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <wchar.h>
25 #include <wctype.h>
26 #include <fcntl.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include "system.h"
30 
31 #include "argmatch.h"
32 #include "c-ctype.h"
33 #include "closeout.h"
34 #include "colorize.h"
35 #include "error.h"
36 #include "exclude.h"
37 #include "exitfail.h"
38 #include "fcntl-safer.h"
39 #include "fts_.h"
40 #include "getopt.h"
41 #include "grep.h"
42 #include "intprops.h"
43 #include "progname.h"
44 #include "propername.h"
45 #include "quote.h"
46 #include "safe-read.h"
47 #include "search.h"
48 #include "version-etc.h"
49 #include "xalloc.h"
50 #include "xstrtol.h"
51 
52 #define SEP_CHAR_SELECTED ':'
53 #define SEP_CHAR_REJECTED '-'
54 #define SEP_STR_GROUP    "--"
55 
56 #define AUTHORS \
57   proper_name ("Mike Haertel"), \
58   _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")
59 
60 /* When stdout is connected to a regular file, save its stat
61    information here, so that we can automatically skip it, thus
62    avoiding a potential (racy) infinite loop.  */
63 static struct stat out_stat;
64 
65 /* if non-zero, display usage information and exit */
66 static int show_help;
67 
68 /* Print the version on standard output and exit.  */
69 static bool show_version;
70 
71 /* Suppress diagnostics for nonexistent or unreadable files.  */
72 static bool suppress_errors;
73 
74 /* If nonzero, use color markers.  */
75 static int color_option;
76 
77 /* Show only the part of a line matching the expression. */
78 static bool only_matching;
79 
80 /* If nonzero, make sure first content char in a line is on a tab stop. */
81 static bool align_tabs;
82 
83 #if HAVE_ASAN
84 /* Record the starting address and length of the sole poisoned region,
85    so that we can unpoison it later, just before each following read.  */
86 static void const *poison_buf;
87 static size_t poison_len;
88 
89 static void
90 clear_asan_poison (void)
91 {
92   if (poison_buf)
93     __asan_unpoison_memory_region (poison_buf, poison_len);
94 }
95 
96 static void
97 asan_poison (void const *addr, size_t size)
98 {
99   poison_buf = addr;
100   poison_len = size;
101 
102   __asan_poison_memory_region (poison_buf, poison_len);
103 }
104 #else
105 static void clear_asan_poison (void) { }
106 static void asan_poison (void const volatile *addr, size_t size) { }
107 #endif
108 
109 /* The group separator used when context is requested. */
110 static const char *group_separator = SEP_STR_GROUP;
111 
112 /* The context and logic for choosing default --color screen attributes
113    (foreground and background colors, etc.) are the following.
114       -- There are eight basic colors available, each with its own
115          nominal luminosity to the human eye and foreground/background
116          codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
117          magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
118          yellow [89 %, 33/43], and white [100 %, 37/47]).
119       -- Sometimes, white as a background is actually implemented using
120          a shade of light gray, so that a foreground white can be visible
121          on top of it (but most often not).
122       -- Sometimes, black as a foreground is actually implemented using
123          a shade of dark gray, so that it can be visible on top of a
124          background black (but most often not).
125       -- Sometimes, more colors are available, as extensions.
126       -- Other attributes can be selected/deselected (bold [1/22],
127          underline [4/24], standout/inverse [7/27], blink [5/25], and
128          invisible/hidden [8/28]).  They are sometimes implemented by
129          using colors instead of what their names imply; e.g., bold is
130          often achieved by using brighter colors.  In practice, only bold
131          is really available to us, underline sometimes being mapped by
132          the terminal to some strange color choice, and standout best
133          being left for use by downstream programs such as less(1).
134       -- We cannot assume that any of the extensions or special features
135          are available for the purpose of choosing defaults for everyone.
136       -- The most prevalent default terminal backgrounds are pure black
137          and pure white, and are not necessarily the same shades of
138          those as if they were selected explicitly with SGR sequences.
139          Some terminals use dark or light pictures as default background,
140          but those are covered over by an explicit selection of background
141          color with an SGR sequence; their users will appreciate their
142          background pictures not be covered like this, if possible.
143       -- Some uses of colors attributes is to make some output items
144          more understated (e.g., context lines); this cannot be achieved
145          by changing the background color.
146       -- For these reasons, the grep color defaults should strive not
147          to change the background color from its default, unless it's
148          for a short item that should be highlighted, not understated.
149       -- The grep foreground color defaults (without an explicitly set
150          background) should provide enough contrast to be readable on any
151          terminal with either a black (dark) or white (light) background.
152          This only leaves red, magenta, green, and cyan (and their bold
153          counterparts) and possibly bold blue.  */
154 /* The color strings used for matched text.
155    The user can overwrite them using the deprecated
156    environment variable GREP_COLOR or the new GREP_COLORS.  */
157 static const char *selected_match_color = "01;31";	/* bold red */
158 static const char *context_match_color  = "01;31";	/* bold red */
159 
160 /* Other colors.  Defaults look damn good.  */
161 static const char *filename_color = "35";	/* magenta */
162 static const char *line_num_color = "32";	/* green */
163 static const char *byte_num_color = "32";	/* green */
164 static const char *sep_color      = "36";	/* cyan */
165 static const char *selected_line_color = "";	/* default color pair */
166 static const char *context_line_color  = "";	/* default color pair */
167 
168 /* Select Graphic Rendition (SGR, "\33[...m") strings.  */
169 /* Also Erase in Line (EL) to Right ("\33[K") by default.  */
170 /*    Why have EL to Right after SGR?
171          -- The behavior of line-wrapping when at the bottom of the
172             terminal screen and at the end of the current line is often
173             such that a new line is introduced, entirely cleared with
174             the current background color which may be different from the
175             default one (see the boolean back_color_erase terminfo(5)
176             capability), thus scrolling the display by one line.
177             The end of this new line will stay in this background color
178             even after reverting to the default background color with
179             "\33[m', unless it is explicitly cleared again with "\33[K"
180             (which is the behavior the user would instinctively expect
181             from the whole thing).  There may be some unavoidable
182             background-color flicker at the end of this new line because
183             of this (when timing with the monitor's redraw is just right).
184          -- The behavior of HT (tab, "\t") is usually the same as that of
185             Cursor Forward Tabulation (CHT) with a default parameter
186             of 1 ("\33[I"), i.e., it performs pure movement to the next
187             tab stop, without any clearing of either content or screen
188             attributes (including background color); try
189                printf 'asdfqwerzxcv\rASDF\tZXCV\n'
190             in a bash(1) shell to demonstrate this.  This is not what the
191             user would instinctively expect of HT (but is ok for CHT).
192             The instinctive behavior would include clearing the terminal
193             cells that are skipped over by HT with blank cells in the
194             current screen attributes, including background color;
195             the boolean dest_tabs_magic_smso terminfo(5) capability
196             indicates this saner behavior for HT, but only some rare
197             terminals have it (although it also indicates a special
198             glitch with standout mode in the Teleray terminal for which
199             it was initially introduced).  The remedy is to add "\33K"
200             after each SGR sequence, be it START (to fix the behavior
201             of any HT after that before another SGR) or END (to fix the
202             behavior of an HT in default background color that would
203             follow a line-wrapping at the bottom of the screen in another
204             background color, and to complement doing it after START).
205             Piping grep's output through a pager such as less(1) avoids
206             any HT problems since the pager performs tab expansion.
207 
208       Generic disadvantages of this remedy are:
209          -- Some very rare terminals might support SGR but not EL (nobody
210             will use "grep --color" on a terminal that does not support
211             SGR in the first place).
212          -- Having these extra control sequences might somewhat complicate
213             the task of any program trying to parse "grep --color"
214             output in order to extract structuring information from it.
215       A specific disadvantage to doing it after SGR START is:
216          -- Even more possible background color flicker (when timing
217             with the monitor's redraw is just right), even when not at the
218             bottom of the screen.
219       There are no additional disadvantages specific to doing it after
220       SGR END.
221 
222       It would be impractical for GNU grep to become a full-fledged
223       terminal program linked against ncurses or the like, so it will
224       not detect terminfo(5) capabilities.  */
225 static const char *sgr_start = "\33[%sm\33[K";
226 static const char *sgr_end   = "\33[m\33[K";
227 
228 /* SGR utility functions.  */
229 static void
230 pr_sgr_start (char const *s)
231 {
232   if (*s)
233     print_start_colorize (sgr_start, s);
234 }
235 static void
236 pr_sgr_end (char const *s)
237 {
238   if (*s)
239     print_end_colorize (sgr_end);
240 }
241 static void
242 pr_sgr_start_if (char const *s)
243 {
244   if (color_option)
245     pr_sgr_start (s);
246 }
247 static void
248 pr_sgr_end_if (char const *s)
249 {
250   if (color_option)
251     pr_sgr_end (s);
252 }
253 
254 struct color_cap
255   {
256     const char *name;
257     const char **var;
258     void (*fct) (void);
259   };
260 
261 static void
262 color_cap_mt_fct (void)
263 {
264   /* Our caller just set selected_match_color.  */
265   context_match_color = selected_match_color;
266 }
267 
268 static void
269 color_cap_rv_fct (void)
270 {
271   /* By this point, it was 1 (or already -1).  */
272   color_option = -1;  /* That's still != 0.  */
273 }
274 
275 static void
276 color_cap_ne_fct (void)
277 {
278   sgr_start = "\33[%sm";
279   sgr_end   = "\33[m";
280 }
281 
282 /* For GREP_COLORS.  */
283 static const struct color_cap color_dict[] =
284   {
285     { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
286     { "ms", &selected_match_color, NULL }, /* selected matched text */
287     { "mc", &context_match_color,  NULL }, /* context matched text */
288     { "fn", &filename_color,       NULL }, /* filename */
289     { "ln", &line_num_color,       NULL }, /* line number */
290     { "bn", &byte_num_color,       NULL }, /* byte (sic) offset */
291     { "se", &sep_color,            NULL }, /* separator */
292     { "sl", &selected_line_color,  NULL }, /* selected lines */
293     { "cx", &context_line_color,   NULL }, /* context lines */
294     { "rv", NULL,                  color_cap_rv_fct }, /* -v reverses sl/cx */
295     { "ne", NULL,                  color_cap_ne_fct }, /* no EL on SGR_* */
296     { NULL, NULL,                  NULL }
297   };
298 
299 static struct exclude *excluded_patterns;
300 static struct exclude *excluded_directory_patterns;
301 /* Short options.  */
302 static char const short_options[] =
303 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
304 
305 /* Non-boolean long options that have no corresponding short equivalents.  */
306 enum
307 {
308   BINARY_FILES_OPTION = CHAR_MAX + 1,
309   COLOR_OPTION,
310   EXCLUDE_DIRECTORY_OPTION,
311   EXCLUDE_OPTION,
312   EXCLUDE_FROM_OPTION,
313   GROUP_SEPARATOR_OPTION,
314   INCLUDE_OPTION,
315   LINE_BUFFERED_OPTION,
316   LABEL_OPTION
317 };
318 
319 /* Long options equivalences. */
320 static struct option const long_options[] =
321 {
322   {"basic-regexp",    no_argument, NULL, 'G'},
323   {"extended-regexp", no_argument, NULL, 'E'},
324   {"fixed-regexp",    no_argument, NULL, 'F'},
325   {"fixed-strings",   no_argument, NULL, 'F'},
326   {"perl-regexp",     no_argument, NULL, 'P'},
327   {"after-context", required_argument, NULL, 'A'},
328   {"before-context", required_argument, NULL, 'B'},
329   {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
330   {"byte-offset", no_argument, NULL, 'b'},
331   {"context", required_argument, NULL, 'C'},
332   {"color", optional_argument, NULL, COLOR_OPTION},
333   {"colour", optional_argument, NULL, COLOR_OPTION},
334   {"count", no_argument, NULL, 'c'},
335   {"devices", required_argument, NULL, 'D'},
336   {"directories", required_argument, NULL, 'd'},
337   {"exclude", required_argument, NULL, EXCLUDE_OPTION},
338   {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
339   {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
340   {"file", required_argument, NULL, 'f'},
341   {"files-with-matches", no_argument, NULL, 'l'},
342   {"files-without-match", no_argument, NULL, 'L'},
343   {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
344   {"help", no_argument, &show_help, 1},
345   {"include", required_argument, NULL, INCLUDE_OPTION},
346   {"ignore-case", no_argument, NULL, 'i'},
347   {"initial-tab", no_argument, NULL, 'T'},
348   {"label", required_argument, NULL, LABEL_OPTION},
349   {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
350   {"line-number", no_argument, NULL, 'n'},
351   {"line-regexp", no_argument, NULL, 'x'},
352   {"max-count", required_argument, NULL, 'm'},
353 
354   {"no-filename", no_argument, NULL, 'h'},
355   {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
356   {"no-messages", no_argument, NULL, 's'},
357   {"null", no_argument, NULL, 'Z'},
358   {"null-data", no_argument, NULL, 'z'},
359   {"only-matching", no_argument, NULL, 'o'},
360   {"quiet", no_argument, NULL, 'q'},
361   {"recursive", no_argument, NULL, 'r'},
362   {"dereference-recursive", no_argument, NULL, 'R'},
363   {"regexp", required_argument, NULL, 'e'},
364   {"invert-match", no_argument, NULL, 'v'},
365   {"silent", no_argument, NULL, 'q'},
366   {"text", no_argument, NULL, 'a'},
367   {"binary", no_argument, NULL, 'U'},
368   {"unix-byte-offsets", no_argument, NULL, 'u'},
369   {"version", no_argument, NULL, 'V'},
370   {"with-filename", no_argument, NULL, 'H'},
371   {"word-regexp", no_argument, NULL, 'w'},
372   {0, 0, 0, 0}
373 };
374 
375 /* Define flags declared in grep.h. */
376 bool match_icase;
377 bool match_words;
378 bool match_lines;
379 char eolbyte;
380 enum textbin input_textbin;
381 
382 static char const *matcher;
383 
384 /* For error messages. */
385 /* The input file name, or (if standard input) "-" or a --label argument.  */
386 static char const *filename;
387 /* Omit leading "./" from file names in diagnostics.  */
388 static bool omit_dot_slash;
389 static bool errseen;
390 static bool write_error_seen;
391 
392 enum directories_type
393   {
394     READ_DIRECTORIES = 2,
395     RECURSE_DIRECTORIES,
396     SKIP_DIRECTORIES
397   };
398 
399 /* How to handle directories.  */
400 static char const *const directories_args[] =
401 {
402   "read", "recurse", "skip", NULL
403 };
404 static enum directories_type const directories_types[] =
405 {
406   READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
407 };
408 ARGMATCH_VERIFY (directories_args, directories_types);
409 
410 static enum directories_type directories = READ_DIRECTORIES;
411 
412 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
413 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
414 
415 /* How to handle devices. */
416 static enum
417   {
418     READ_COMMAND_LINE_DEVICES,
419     READ_DEVICES,
420     SKIP_DEVICES
421   } devices = READ_COMMAND_LINE_DEVICES;
422 
423 static bool grepfile (int, char const *, bool, bool);
424 static bool grepdesc (int, bool);
425 
426 static void dos_binary (void);
427 static void dos_unix_byte_offsets (void);
428 static size_t undossify_input (char *, size_t);
429 
430 static bool
431 is_device_mode (mode_t m)
432 {
433   return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
434 }
435 
436 static bool
437 skip_devices (bool command_line)
438 {
439   return (devices == SKIP_DEVICES
440           || (devices == READ_COMMAND_LINE_DEVICES && !command_line));
441 }
442 
443 /* Return if ST->st_size is defined.  Assume the file is not a
444    symbolic link.  */
445 static bool
446 usable_st_size (struct stat const *st)
447 {
448   return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
449 }
450 
451 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
452    Do not rely on these finding data or holes if they equal SEEK_SET.  */
453 #ifndef SEEK_DATA
454 enum { SEEK_DATA = SEEK_SET };
455 #endif
456 #ifndef SEEK_HOLE
457 enum { SEEK_HOLE = SEEK_SET };
458 #endif
459 
460 /* Functions we'll use to search. */
461 typedef void (*compile_fp_t) (char const *, size_t);
462 typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *);
463 static compile_fp_t compile;
464 static execute_fp_t execute;
465 
466 /* Like error, but suppress the diagnostic if requested.  */
467 static void
468 suppressible_error (char const *mesg, int errnum)
469 {
470   if (! suppress_errors)
471     error (0, errnum, "%s", mesg);
472   errseen = true;
473 }
474 
475 /* If there has already been a write error, don't bother closing
476    standard output, as that might elicit a duplicate diagnostic.  */
477 static void
478 clean_up_stdout (void)
479 {
480   if (! write_error_seen)
481     close_stdout ();
482 }
483 
484 static bool
485 textbin_is_binary (enum textbin textbin)
486 {
487   return textbin < TEXTBIN_UNKNOWN;
488 }
489 
490 /* The high-order bit of a byte.  */
491 enum { HIBYTE = 0x80 };
492 
493 /* True if every byte with HIBYTE off is a single-byte character.
494    UTF-8 has this property.  */
495 static bool easy_encoding;
496 
497 static void
498 init_easy_encoding (void)
499 {
500   easy_encoding = true;
501   for (int i = 0; i < HIBYTE; i++)
502     easy_encoding &= mbclen_cache[i] == 1;
503 }
504 
505 /* A cast to TYPE of VAL.  Use this when TYPE is a pointer type, VAL
506    is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
507    the alignment and would otherwise complain about the cast.  */
508 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
509 # define CAST_ALIGNED(type, val)                           \
510     ({ __typeof__ (val) val_ = val;                        \
511        _Pragma ("GCC diagnostic push")                     \
512        _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
513        (type) val_;                                        \
514        _Pragma ("GCC diagnostic pop")                      \
515     })
516 #else
517 # define CAST_ALIGNED(type, val) ((type) (val))
518 #endif
519 
520 /* An unsigned type suitable for fast matching.  */
521 typedef uintmax_t uword;
522 
523 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
524    that is not easy, and return a pointer to the first non-easy byte.
525    In easy encodings, the easy bytes all have HIBYTE off.
526    In other encodings, no byte is easy.  */
527 static char const * _GL_ATTRIBUTE_PURE
528 skip_easy_bytes (char const *buf)
529 {
530   if (!easy_encoding)
531     return buf;
532 
533   uword uword_max = -1;
534 
535   /* 0x8080..., extended to be wide enough for uword.  */
536   uword hibyte_mask = uword_max / UCHAR_MAX * HIBYTE;
537 
538   /* Search a byte at a time until the pointer is aligned, then a
539      uword at a time until a match is found, then a byte at a time to
540      identify the exact byte.  The uword search may go slightly past
541      the buffer end, but that's benign.  */
542   char const *p;
543   uword const *s;
544   for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
545     if (*p & HIBYTE)
546       return p;
547   for (s = CAST_ALIGNED (uword const *, p); ! (*s & hibyte_mask); s++)
548     continue;
549   for (p = (char const *) s; ! (*p & HIBYTE); p++)
550     continue;
551   return p;
552 }
553 
554 /* Return the text type of data in BUF, of size SIZE.
555    BUF must be followed by at least sizeof (uword) bytes,
556    which may be arbitrarily written to or read from.  */
557 static enum textbin
558 buffer_textbin (char *buf, size_t size)
559 {
560   if (eolbyte && memchr (buf, '\0', size))
561     return TEXTBIN_BINARY;
562 
563   if (1 < MB_CUR_MAX)
564     {
565       mbstate_t mbs = { 0 };
566       size_t clen;
567       char const *p;
568 
569       buf[size] = -1;
570       for (p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
571         {
572           clen = mbrlen (p, buf + size - p, &mbs);
573           if ((size_t) -2 <= clen)
574             return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY;
575         }
576     }
577 
578   return TEXTBIN_TEXT;
579 }
580 
581 /* Return the text type of a file.  BUF, of size SIZE, is the initial
582    buffer read from the file with descriptor FD and status ST.
583    BUF must be followed by at least sizeof (uword) bytes,
584    which may be arbitrarily written to or read from.  */
585 static enum textbin
586 file_textbin (char *buf, size_t size, int fd, struct stat const *st)
587 {
588   enum textbin textbin = buffer_textbin (buf, size);
589   if (textbin_is_binary (textbin))
590     return textbin;
591 
592   if (usable_st_size (st))
593     {
594       if (st->st_size <= size)
595         return textbin == TEXTBIN_UNKNOWN ? TEXTBIN_BINARY : textbin;
596 
597       /* If the file has holes, it must contain a null byte somewhere.  */
598       if (SEEK_HOLE != SEEK_SET && eolbyte)
599         {
600           off_t cur = size;
601           if (O_BINARY || fd == STDIN_FILENO)
602             {
603               cur = lseek (fd, 0, SEEK_CUR);
604               if (cur < 0)
605                 return TEXTBIN_UNKNOWN;
606             }
607 
608           /* Look for a hole after the current location.  */
609           off_t hole_start = lseek (fd, cur, SEEK_HOLE);
610           if (0 <= hole_start)
611             {
612               if (lseek (fd, cur, SEEK_SET) < 0)
613                 suppressible_error (filename, errno);
614               if (hole_start < st->st_size)
615                 return TEXTBIN_BINARY;
616             }
617         }
618     }
619 
620   return TEXTBIN_UNKNOWN;
621 }
622 
623 /* Convert STR to a nonnegative integer, storing the result in *OUT.
624    STR must be a valid context length argument; report an error if it
625    isn't.  Silently ceiling *OUT at the maximum value, as that is
626    practically equivalent to infinity for grep's purposes.  */
627 static void
628 context_length_arg (char const *str, intmax_t *out)
629 {
630   switch (xstrtoimax (str, 0, 10, out, ""))
631     {
632     case LONGINT_OK:
633     case LONGINT_OVERFLOW:
634       if (0 <= *out)
635         break;
636       /* Fall through.  */
637     default:
638       error (EXIT_TROUBLE, 0, "%s: %s", str,
639              _("invalid context length argument"));
640     }
641 }
642 
643 /* Return true if the file with NAME should be skipped.
644    If COMMAND_LINE, it is a command-line argument.
645    If IS_DIR, it is a directory.  */
646 static bool
647 skipped_file (char const *name, bool command_line, bool is_dir)
648 {
649   return (is_dir
650           ? (directories == SKIP_DIRECTORIES
651              || (! (command_line && omit_dot_slash)
652                  && excluded_directory_patterns
653                  && excluded_file_name (excluded_directory_patterns, name)))
654           : (excluded_patterns
655              && excluded_file_name (excluded_patterns, name)));
656 }
657 
658 /* Hairy buffering mechanism for grep.  The intent is to keep
659    all reads aligned on a page boundary and multiples of the
660    page size, unless a read yields a partial page.  */
661 
662 static char *buffer;		/* Base of buffer. */
663 static size_t bufalloc;		/* Allocated buffer size, counting slop. */
664 #define INITIAL_BUFSIZE 32768	/* Initial buffer size, not counting slop. */
665 static int bufdesc;		/* File descriptor. */
666 static char *bufbeg;		/* Beginning of user-visible stuff. */
667 static char *buflim;		/* Limit of user-visible stuff. */
668 static size_t pagesize;		/* alignment of memory pages */
669 static off_t bufoffset;		/* Read offset; defined on regular files.  */
670 static off_t after_last_match;	/* Pointer after last matching line that
671                                    would have been output if we were
672                                    outputting characters. */
673 static bool skip_nuls;		/* Skip '\0' in data.  */
674 static bool skip_empty_lines;	/* Skip empty lines in data.  */
675 static bool seek_data_failed;	/* lseek with SEEK_DATA failed.  */
676 static uintmax_t totalnl;	/* Total newline count before lastnl. */
677 
678 /* Return VAL aligned to the next multiple of ALIGNMENT.  VAL can be
679    an integer or a pointer.  Both args must be free of side effects.  */
680 #define ALIGN_TO(val, alignment) \
681   ((size_t) (val) % (alignment) == 0 \
682    ? (val) \
683    : (val) + ((alignment) - (size_t) (val) % (alignment)))
684 
685 /* Add two numbers that count input bytes or lines, and report an
686    error if the addition overflows.  */
687 static uintmax_t
688 add_count (uintmax_t a, uintmax_t b)
689 {
690   uintmax_t sum = a + b;
691   if (sum < a)
692     error (EXIT_TROUBLE, 0, _("input is too large to count"));
693   return sum;
694 }
695 
696 /* Return true if BUF (of size SIZE) is all zeros.  */
697 static bool
698 all_zeros (char const *buf, size_t size)
699 {
700   for (char const *p = buf; p < buf + size; p++)
701     if (*p)
702       return false;
703   return true;
704 }
705 
706 /* Reset the buffer for a new file, returning false if we should skip it.
707    Initialize on the first time through. */
708 static bool
709 reset (int fd, struct stat const *st)
710 {
711   if (! pagesize)
712     {
713       pagesize = getpagesize ();
714       if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
715         abort ();
716       bufalloc = (ALIGN_TO (INITIAL_BUFSIZE, pagesize)
717                   + pagesize + sizeof (uword));
718       buffer = xmalloc (bufalloc);
719     }
720 
721   bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
722   bufbeg[-1] = eolbyte;
723   bufdesc = fd;
724 
725   if (S_ISREG (st->st_mode))
726     {
727       if (fd != STDIN_FILENO)
728         bufoffset = 0;
729       else
730         {
731           bufoffset = lseek (fd, 0, SEEK_CUR);
732           if (bufoffset < 0)
733             {
734               suppressible_error (_("lseek failed"), errno);
735               return false;
736             }
737         }
738     }
739   return true;
740 }
741 
742 /* Read new stuff into the buffer, saving the specified
743    amount of old stuff.  When we're done, 'bufbeg' points
744    to the beginning of the buffer contents, and 'buflim'
745    points just after the end.  Return false if there's an error.  */
746 static bool
747 fillbuf (size_t save, struct stat const *st)
748 {
749   size_t fillsize;
750   bool cc = true;
751   char *readbuf;
752   size_t readsize;
753 
754   /* Offset from start of buffer to start of old stuff
755      that we want to save.  */
756   size_t saved_offset = buflim - save - buffer;
757 
758   if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
759     {
760       readbuf = buflim;
761       bufbeg = buflim - save;
762     }
763   else
764     {
765       size_t minsize = save + pagesize;
766       size_t newsize;
767       size_t newalloc;
768       char *newbuf;
769 
770       /* Grow newsize until it is at least as great as minsize.  */
771       for (newsize = bufalloc - pagesize - sizeof (uword);
772            newsize < minsize;
773            newsize *= 2)
774         if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
775           xalloc_die ();
776 
777       /* Try not to allocate more memory than the file size indicates,
778          as that might cause unnecessary memory exhaustion if the file
779          is large.  However, do not use the original file size as a
780          heuristic if we've already read past the file end, as most
781          likely the file is growing.  */
782       if (usable_st_size (st))
783         {
784           off_t to_be_read = st->st_size - bufoffset;
785           off_t maxsize_off = save + to_be_read;
786           if (0 <= to_be_read && to_be_read <= maxsize_off
787               && maxsize_off == (size_t) maxsize_off
788               && minsize <= (size_t) maxsize_off
789               && (size_t) maxsize_off < newsize)
790             newsize = maxsize_off;
791         }
792 
793       /* Add enough room so that the buffer is aligned and has room
794          for byte sentinels fore and aft, and so that a uword can
795          be read aft.  */
796       newalloc = newsize + pagesize + sizeof (uword);
797 
798       newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
799       readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
800       bufbeg = readbuf - save;
801       memmove (bufbeg, buffer + saved_offset, save);
802       bufbeg[-1] = eolbyte;
803       if (newbuf != buffer)
804         {
805           free (buffer);
806           buffer = newbuf;
807         }
808     }
809 
810   clear_asan_poison ();
811 
812   readsize = buffer + bufalloc - sizeof (uword) - readbuf;
813   readsize -= readsize % pagesize;
814 
815   while (true)
816     {
817       fillsize = safe_read (bufdesc, readbuf, readsize);
818       if (fillsize == SAFE_READ_ERROR)
819         {
820           fillsize = 0;
821           cc = false;
822         }
823       bufoffset += fillsize;
824 
825       if (fillsize == 0 || !skip_nuls || !all_zeros (readbuf, fillsize))
826         break;
827       totalnl = add_count (totalnl, fillsize);
828 
829       if (SEEK_DATA != SEEK_SET && !seek_data_failed)
830         {
831           /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF.  */
832           off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
833           if (data_start < 0 && errno == ENXIO
834               && usable_st_size (st) && bufoffset < st->st_size)
835             data_start = lseek (bufdesc, 0, SEEK_END);
836 
837           if (data_start < 0)
838             seek_data_failed = true;
839           else
840             {
841               totalnl = add_count (totalnl, data_start - bufoffset);
842               bufoffset = data_start;
843             }
844         }
845     }
846 
847   fillsize = undossify_input (readbuf, fillsize);
848   buflim = readbuf + fillsize;
849 
850   /* Initialize the following word, because skip_easy_bytes and some
851      matchers read (but do not use) those bytes.  This avoids false
852      positive reports of these bytes being used uninitialized.  */
853   memset (buflim, 0, sizeof (uword));
854 
855   /* Mark the part of the buffer not filled by the read or set by
856      the above memset call as ASAN-poisoned.  */
857   asan_poison (buflim + sizeof (uword),
858                bufalloc - (buflim - buffer) - sizeof (uword));
859 
860   return cc;
861 }
862 
863 /* Flags controlling the style of output. */
864 static enum
865 {
866   BINARY_BINARY_FILES,
867   TEXT_BINARY_FILES,
868   WITHOUT_MATCH_BINARY_FILES
869 } binary_files;		/* How to handle binary files.  */
870 
871 static int filename_mask;	/* If zero, output nulls after filenames.  */
872 static bool out_quiet;		/* Suppress all normal output. */
873 static bool out_invert;		/* Print nonmatching stuff. */
874 static int out_file;		/* Print filenames. */
875 static bool out_line;		/* Print line numbers. */
876 static bool out_byte;		/* Print byte offsets. */
877 static intmax_t out_before;	/* Lines of leading context. */
878 static intmax_t out_after;	/* Lines of trailing context. */
879 static bool count_matches;	/* Count matching lines.  */
880 static int list_files;		/* List matching files.  */
881 static bool no_filenames;	/* Suppress file names.  */
882 static intmax_t max_count;	/* Stop after outputting this many
883                                    lines from an input file.  */
884 static bool line_buffered;	/* Use line buffering.  */
885 static char *label = NULL;      /* Fake filename for stdin */
886 
887 
888 /* Internal variables to keep track of byte count, context, etc. */
889 static uintmax_t totalcc;	/* Total character count before bufbeg. */
890 static char const *lastnl;	/* Pointer after last newline counted. */
891 static char const *lastout;	/* Pointer after last character output;
892                                    NULL if no character has been output
893                                    or if it's conceptually before bufbeg. */
894 static intmax_t outleft;	/* Maximum number of lines to be output.  */
895 static intmax_t pending;	/* Pending lines of output.
896                                    Always kept 0 if out_quiet is true.  */
897 static bool done_on_match;	/* Stop scanning file on first match.  */
898 static bool exit_on_match;	/* Exit on first match.  */
899 
900 #include "dosbuf.c"
901 
902 static void
903 nlscan (char const *lim)
904 {
905   size_t newlines = 0;
906   char const *beg;
907   for (beg = lastnl; beg < lim; beg++)
908     {
909       beg = memchr (beg, eolbyte, lim - beg);
910       if (!beg)
911         break;
912       newlines++;
913     }
914   totalnl = add_count (totalnl, newlines);
915   lastnl = lim;
916 }
917 
918 /* Print the current filename.  */
919 static void
920 print_filename (void)
921 {
922   pr_sgr_start_if (filename_color);
923   fputs (filename, stdout);
924   pr_sgr_end_if (filename_color);
925 }
926 
927 /* Print a character separator.  */
928 static void
929 print_sep (char sep)
930 {
931   pr_sgr_start_if (sep_color);
932   fputc (sep, stdout);
933   pr_sgr_end_if (sep_color);
934 }
935 
936 /* Print a line number or a byte offset.  */
937 static void
938 print_offset (uintmax_t pos, int min_width, const char *color)
939 {
940   /* Do not rely on printf to print pos, since uintmax_t may be longer
941      than long, and long long is not portable.  */
942 
943   char buf[sizeof pos * CHAR_BIT];
944   char *p = buf + sizeof buf;
945 
946   do
947     {
948       *--p = '0' + pos % 10;
949       --min_width;
950     }
951   while ((pos /= 10) != 0);
952 
953   /* Do this to maximize the probability of alignment across lines.  */
954   if (align_tabs)
955     while (--min_width >= 0)
956       *--p = ' ';
957 
958   pr_sgr_start_if (color);
959   fwrite (p, 1, buf + sizeof buf - p, stdout);
960   pr_sgr_end_if (color);
961 }
962 
963 /* Print a whole line head (filename, line, byte).  */
964 static void
965 print_line_head (char const *beg, char const *lim, char sep)
966 {
967   bool pending_sep = false;
968 
969   if (out_file)
970     {
971       print_filename ();
972       if (filename_mask)
973         pending_sep = true;
974       else
975         fputc (0, stdout);
976     }
977 
978   if (out_line)
979     {
980       if (lastnl < lim)
981         {
982           nlscan (beg);
983           totalnl = add_count (totalnl, 1);
984           lastnl = lim;
985         }
986       if (pending_sep)
987         print_sep (sep);
988       print_offset (totalnl, 4, line_num_color);
989       pending_sep = true;
990     }
991 
992   if (out_byte)
993     {
994       uintmax_t pos = add_count (totalcc, beg - bufbeg);
995       pos = dossified_pos (pos);
996       if (pending_sep)
997         print_sep (sep);
998       print_offset (pos, 6, byte_num_color);
999       pending_sep = true;
1000     }
1001 
1002   if (pending_sep)
1003     {
1004       /* This assumes sep is one column wide.
1005          Try doing this any other way with Unicode
1006          (and its combining and wide characters)
1007          filenames and you're wasting your efforts.  */
1008       if (align_tabs)
1009         fputs ("\t\b", stdout);
1010 
1011       print_sep (sep);
1012     }
1013 }
1014 
1015 static const char *
1016 print_line_middle (const char *beg, const char *lim,
1017                    const char *line_color, const char *match_color)
1018 {
1019   size_t match_size;
1020   size_t match_offset;
1021   const char *cur = beg;
1022   const char *mid = NULL;
1023 
1024   while (cur < lim
1025          && ((match_offset = execute (beg, lim - beg, &match_size, cur))
1026              != (size_t) -1))
1027     {
1028       char const *b = beg + match_offset;
1029 
1030       /* Avoid matching the empty line at the end of the buffer. */
1031       if (b == lim)
1032         break;
1033 
1034       /* Avoid hanging on grep --color "" foo */
1035       if (match_size == 0)
1036         {
1037           /* Make minimal progress; there may be further non-empty matches.  */
1038           /* XXX - Could really advance by one whole multi-octet character.  */
1039           match_size = 1;
1040           if (!mid)
1041             mid = cur;
1042         }
1043       else
1044         {
1045           /* This function is called on a matching line only,
1046              but is it selected or rejected/context?  */
1047           if (only_matching)
1048             print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
1049                                       : SEP_CHAR_SELECTED));
1050           else
1051             {
1052               pr_sgr_start (line_color);
1053               if (mid)
1054                 {
1055                   cur = mid;
1056                   mid = NULL;
1057                 }
1058               fwrite (cur, sizeof (char), b - cur, stdout);
1059             }
1060 
1061           pr_sgr_start_if (match_color);
1062           fwrite (b, sizeof (char), match_size, stdout);
1063           pr_sgr_end_if (match_color);
1064           if (only_matching)
1065             fputs ("\n", stdout);
1066         }
1067       cur = b + match_size;
1068     }
1069 
1070   if (only_matching)
1071     cur = lim;
1072   else if (mid)
1073     cur = mid;
1074 
1075   return cur;
1076 }
1077 
1078 static const char *
1079 print_line_tail (const char *beg, const char *lim, const char *line_color)
1080 {
1081   size_t eol_size;
1082   size_t tail_size;
1083 
1084   eol_size   = (lim > beg && lim[-1] == eolbyte);
1085   eol_size  += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1086   tail_size  =  lim - eol_size - beg;
1087 
1088   if (tail_size > 0)
1089     {
1090       pr_sgr_start (line_color);
1091       fwrite (beg, 1, tail_size, stdout);
1092       beg += tail_size;
1093       pr_sgr_end (line_color);
1094     }
1095 
1096   return beg;
1097 }
1098 
1099 static void
1100 prline (char const *beg, char const *lim, char sep)
1101 {
1102   bool matching;
1103   const char *line_color;
1104   const char *match_color;
1105 
1106   if (!only_matching)
1107     print_line_head (beg, lim, sep);
1108 
1109   matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1110 
1111   if (color_option)
1112     {
1113       line_color = (((sep == SEP_CHAR_SELECTED)
1114                      ^ (out_invert && (color_option < 0)))
1115                     ? selected_line_color  : context_line_color);
1116       match_color = (sep == SEP_CHAR_SELECTED
1117                      ? selected_match_color : context_match_color);
1118     }
1119   else
1120     line_color = match_color = NULL; /* Shouldn't be used.  */
1121 
1122   if ((only_matching && matching)
1123       || (color_option && (*line_color || *match_color)))
1124     {
1125       /* We already know that non-matching lines have no match (to colorize). */
1126       if (matching && (only_matching || *match_color))
1127         beg = print_line_middle (beg, lim, line_color, match_color);
1128 
1129       if (!only_matching && *line_color)
1130         {
1131           /* This code is exercised at least when grep is invoked like this:
1132              echo k| GREP_COLORS='sl=01;32' src/grep k --color=always  */
1133           beg = print_line_tail (beg, lim, line_color);
1134         }
1135     }
1136 
1137   if (!only_matching && lim > beg)
1138     fwrite (beg, 1, lim - beg, stdout);
1139 
1140   if (ferror (stdout))
1141     {
1142       write_error_seen = true;
1143       error (EXIT_TROUBLE, 0, _("write error"));
1144     }
1145 
1146   lastout = lim;
1147 
1148   if (line_buffered)
1149     fflush (stdout);
1150 }
1151 
1152 /* Print pending lines of trailing context prior to LIM. Trailing context ends
1153    at the next matching line when OUTLEFT is 0.  */
1154 static void
1155 prpending (char const *lim)
1156 {
1157   if (!lastout)
1158     lastout = bufbeg;
1159   while (pending > 0 && lastout < lim)
1160     {
1161       char const *nl = memchr (lastout, eolbyte, lim - lastout);
1162       size_t match_size;
1163       --pending;
1164       if (outleft
1165           || ((execute (lastout, nl + 1 - lastout,
1166                         &match_size, NULL) == (size_t) -1)
1167               == !out_invert))
1168         prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1169       else
1170         pending = 0;
1171     }
1172 }
1173 
1174 /* Output the lines between BEG and LIM.  Deal with context.  */
1175 static void
1176 prtext (char const *beg, char const *lim)
1177 {
1178   static bool used;	/* Avoid printing SEP_STR_GROUP before any output.  */
1179   char eol = eolbyte;
1180 
1181   if (!out_quiet && pending > 0)
1182     prpending (beg);
1183 
1184   char const *p = beg;
1185 
1186   if (!out_quiet)
1187     {
1188       /* Deal with leading context.  */
1189       char const *bp = lastout ? lastout : bufbeg;
1190       intmax_t i;
1191       for (i = 0; i < out_before; ++i)
1192         if (p > bp)
1193           do
1194             --p;
1195           while (p[-1] != eol);
1196 
1197       /* Print the group separator unless the output is adjacent to
1198          the previous output in the file.  */
1199       if ((0 <= out_before || 0 <= out_after) && used
1200           && p != lastout && group_separator)
1201         {
1202           pr_sgr_start_if (sep_color);
1203           fputs (group_separator, stdout);
1204           pr_sgr_end_if (sep_color);
1205           fputc ('\n', stdout);
1206         }
1207 
1208       while (p < beg)
1209         {
1210           char const *nl = memchr (p, eol, beg - p);
1211           nl++;
1212           prline (p, nl, SEP_CHAR_REJECTED);
1213           p = nl;
1214         }
1215     }
1216 
1217   intmax_t n;
1218   if (out_invert)
1219     {
1220       /* One or more lines are output.  */
1221       for (n = 0; p < lim && n < outleft; n++)
1222         {
1223           char const *nl = memchr (p, eol, lim - p);
1224           nl++;
1225           if (!out_quiet)
1226             prline (p, nl, SEP_CHAR_SELECTED);
1227           p = nl;
1228         }
1229     }
1230   else
1231     {
1232       /* Just one line is output.  */
1233       if (!out_quiet)
1234         prline (beg, lim, SEP_CHAR_SELECTED);
1235       n = 1;
1236       p = lim;
1237     }
1238 
1239   after_last_match = bufoffset - (buflim - p);
1240   pending = out_quiet ? 0 : MAX (0, out_after);
1241   used = true;
1242   outleft -= n;
1243 }
1244 
1245 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1246    This avoids running out of memory when binary input contains a long
1247    sequence of zeros, which would otherwise be considered to be part
1248    of a long line.  P[LIM] should be EOL.  */
1249 static void
1250 zap_nuls (char *p, char *lim, char eol)
1251 {
1252   if (eol)
1253     while (true)
1254       {
1255         *lim = '\0';
1256         p += strlen (p);
1257         *lim = eol;
1258         if (p == lim)
1259           break;
1260         do
1261           *p++ = eol;
1262         while (!*p);
1263       }
1264 }
1265 
1266 /* Scan the specified portion of the buffer, matching lines (or
1267    between matching lines if OUT_INVERT is true).  Return a count of
1268    lines printed.  Replace all NUL bytes with NUL_ZAPPER as we go.  */
1269 static intmax_t
1270 grepbuf (char const *beg, char const *lim)
1271 {
1272   intmax_t outleft0 = outleft;
1273   char const *p;
1274   char const *endp;
1275 
1276   for (p = beg; p < lim; p = endp)
1277     {
1278       size_t match_size;
1279       size_t match_offset = execute (p, lim - p, &match_size, NULL);
1280       if (match_offset == (size_t) -1)
1281         {
1282           if (!out_invert)
1283             break;
1284           match_offset = lim - p;
1285           match_size = 0;
1286         }
1287       char const *b = p + match_offset;
1288       endp = b + match_size;
1289       /* Avoid matching the empty line at the end of the buffer. */
1290       if (!out_invert && b == lim)
1291         break;
1292       if (!out_invert || p < b)
1293         {
1294           char const *prbeg = out_invert ? p : b;
1295           char const *prend = out_invert ? b : endp;
1296           prtext (prbeg, prend);
1297           if (!outleft || done_on_match)
1298             {
1299               if (exit_on_match)
1300                 exit (EXIT_SUCCESS);
1301               break;
1302             }
1303         }
1304     }
1305 
1306   return outleft0 - outleft;
1307 }
1308 
1309 /* Search a given file.  Normally, return a count of lines printed;
1310    but if the file is a directory and we search it recursively, then
1311    return -2 if there was a match, and -1 otherwise.  */
1312 static intmax_t
1313 grep (int fd, struct stat const *st)
1314 {
1315   intmax_t nlines, i;
1316   enum textbin textbin;
1317   size_t residue, save;
1318   char oldc;
1319   char *beg;
1320   char *lim;
1321   char eol = eolbyte;
1322   char nul_zapper = '\0';
1323   bool done_on_match_0 = done_on_match;
1324   bool out_quiet_0 = out_quiet;
1325 
1326   if (! reset (fd, st))
1327     return 0;
1328 
1329   totalcc = 0;
1330   lastout = 0;
1331   totalnl = 0;
1332   outleft = max_count;
1333   after_last_match = 0;
1334   pending = 0;
1335   skip_nuls = skip_empty_lines && !eol;
1336   seek_data_failed = false;
1337 
1338   nlines = 0;
1339   residue = 0;
1340   save = 0;
1341 
1342   if (! fillbuf (save, st))
1343     {
1344       suppressible_error (filename, errno);
1345       return 0;
1346     }
1347 
1348   if (binary_files == TEXT_BINARY_FILES)
1349     textbin = TEXTBIN_TEXT;
1350   else
1351     {
1352       textbin = file_textbin (bufbeg, buflim - bufbeg, fd, st);
1353       if (textbin_is_binary (textbin))
1354         {
1355           if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1356             return 0;
1357           done_on_match = out_quiet = true;
1358           nul_zapper = eol;
1359           skip_nuls = skip_empty_lines;
1360         }
1361       else if (execute != Pexecute)
1362         textbin = TEXTBIN_TEXT;
1363     }
1364 
1365   for (;;)
1366     {
1367       input_textbin = textbin;
1368       lastnl = bufbeg;
1369       if (lastout)
1370         lastout = bufbeg;
1371 
1372       beg = bufbeg + save;
1373 
1374       /* no more data to scan (eof) except for maybe a residue -> break */
1375       if (beg == buflim)
1376         break;
1377 
1378       zap_nuls (beg, buflim, nul_zapper);
1379 
1380       /* Determine new residue (the length of an incomplete line at the end of
1381          the buffer, 0 means there is no incomplete last line).  */
1382       oldc = beg[-1];
1383       beg[-1] = eol;
1384       /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1385          that this use of memrchr is guaranteed never to return NULL.  */
1386       lim = memrchr (beg - 1, eol, buflim - beg + 1);
1387       ++lim;
1388       beg[-1] = oldc;
1389       if (lim == beg)
1390         lim = beg - residue;
1391       beg -= residue;
1392       residue = buflim - lim;
1393 
1394       if (beg < lim)
1395         {
1396           if (outleft)
1397             nlines += grepbuf (beg, lim);
1398           if (pending)
1399             prpending (lim);
1400           if ((!outleft && !pending) || (nlines && done_on_match))
1401             goto finish_grep;
1402         }
1403 
1404       /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1405          leading context if there is a matching line at the begin of the
1406          next data. Make beg point to their begin.  */
1407       i = 0;
1408       beg = lim;
1409       while (i < out_before && beg > bufbeg && beg != lastout)
1410         {
1411           ++i;
1412           do
1413             --beg;
1414           while (beg[-1] != eol);
1415         }
1416 
1417       /* Detect whether leading context is adjacent to previous output.  */
1418       if (lastout)
1419         {
1420           if (textbin == TEXTBIN_UNKNOWN)
1421             textbin = TEXTBIN_TEXT;
1422           if (beg != lastout)
1423             lastout = 0;
1424         }
1425 
1426       /* Handle some details and read more data to scan.  */
1427       save = residue + lim - beg;
1428       if (out_byte)
1429         totalcc = add_count (totalcc, buflim - bufbeg - save);
1430       if (out_line)
1431         nlscan (beg);
1432       if (! fillbuf (save, st))
1433         {
1434           suppressible_error (filename, errno);
1435           goto finish_grep;
1436         }
1437 
1438       /* If the file's textbin has not been determined yet, assume
1439          it's binary if the next input buffer suggests so.  */
1440       if (textbin == TEXTBIN_UNKNOWN)
1441         {
1442           enum textbin tb = buffer_textbin (bufbeg, buflim - bufbeg);
1443           if (textbin_is_binary (tb))
1444             {
1445               if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1446                 return 0;
1447               textbin = tb;
1448               done_on_match = out_quiet = true;
1449               nul_zapper = eol;
1450               skip_nuls = skip_empty_lines;
1451             }
1452         }
1453     }
1454   if (residue)
1455     {
1456       *buflim++ = eol;
1457       if (outleft)
1458         nlines += grepbuf (bufbeg + save - residue, buflim);
1459       if (pending)
1460         prpending (buflim);
1461     }
1462 
1463  finish_grep:
1464   done_on_match = done_on_match_0;
1465   out_quiet = out_quiet_0;
1466   if (textbin_is_binary (textbin) && !out_quiet && nlines != 0)
1467     printf (_("Binary file %s matches\n"), filename);
1468   return nlines;
1469 }
1470 
1471 static bool
1472 grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1473 {
1474   bool follow;
1475   int dirdesc;
1476   command_line &= ent->fts_level == FTS_ROOTLEVEL;
1477 
1478   if (ent->fts_info == FTS_DP)
1479     {
1480       if (directories == RECURSE_DIRECTORIES && command_line)
1481         out_file &= ~ (2 * !no_filenames);
1482       return true;
1483     }
1484 
1485   if (!command_line
1486       && skipped_file (ent->fts_name, false,
1487                        (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1488                         || ent->fts_info == FTS_DNR)))
1489     {
1490       fts_set (fts, ent, FTS_SKIP);
1491       return true;
1492     }
1493 
1494   filename = ent->fts_path;
1495   if (omit_dot_slash && filename[1])
1496     filename += 2;
1497   follow = (fts->fts_options & FTS_LOGICAL
1498             || (fts->fts_options & FTS_COMFOLLOW && command_line));
1499 
1500   switch (ent->fts_info)
1501     {
1502     case FTS_D:
1503       if (directories == RECURSE_DIRECTORIES)
1504         {
1505           out_file |= 2 * !no_filenames;
1506           return true;
1507         }
1508       fts_set (fts, ent, FTS_SKIP);
1509       break;
1510 
1511     case FTS_DC:
1512       if (!suppress_errors)
1513         error (0, 0, _("warning: %s: %s"), filename,
1514                _("recursive directory loop"));
1515       return true;
1516 
1517     case FTS_DNR:
1518     case FTS_ERR:
1519     case FTS_NS:
1520       suppressible_error (filename, ent->fts_errno);
1521       return true;
1522 
1523     case FTS_DEFAULT:
1524     case FTS_NSOK:
1525       if (skip_devices (command_line))
1526         {
1527           struct stat *st = ent->fts_statp;
1528           struct stat st1;
1529           if (! st->st_mode)
1530             {
1531               /* The file type is not already known.  Get the file status
1532                  before opening, since opening might have side effects
1533                  on a device.  */
1534               int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1535               if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1536                 {
1537                   suppressible_error (filename, errno);
1538                   return true;
1539                 }
1540               st = &st1;
1541             }
1542           if (is_device_mode (st->st_mode))
1543             return true;
1544         }
1545       break;
1546 
1547     case FTS_F:
1548     case FTS_SLNONE:
1549       break;
1550 
1551     case FTS_SL:
1552     case FTS_W:
1553       return true;
1554 
1555     default:
1556       abort ();
1557     }
1558 
1559   dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD
1560              ? fts->fts_cwd_fd
1561              : AT_FDCWD);
1562   return grepfile (dirdesc, ent->fts_accpath, follow, command_line);
1563 }
1564 
1565 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1566    POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD.  */
1567 static bool
1568 open_symlink_nofollow_error (int err)
1569 {
1570   if (err == ELOOP || err == EMLINK)
1571     return true;
1572 #ifdef EFTYPE
1573   if (err == EFTYPE)
1574     return true;
1575 #endif
1576   return false;
1577 }
1578 
1579 static bool
1580 grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1581 {
1582   int oflag = (O_RDONLY | O_NOCTTY
1583                | (follow ? 0 : O_NOFOLLOW)
1584                | (skip_devices (command_line) ? O_NONBLOCK : 0));
1585   int desc = openat_safer (dirdesc, name, oflag);
1586   if (desc < 0)
1587     {
1588       if (follow || ! open_symlink_nofollow_error (errno))
1589         suppressible_error (filename, errno);
1590       return true;
1591     }
1592   return grepdesc (desc, command_line);
1593 }
1594 
1595 static bool
1596 grepdesc (int desc, bool command_line)
1597 {
1598   intmax_t count;
1599   bool status = true;
1600   struct stat st;
1601 
1602   /* Get the file status, possibly for the second time.  This catches
1603      a race condition if the directory entry changes after the
1604      directory entry is read and before the file is opened.  For
1605      example, normally DESC is a directory only at the top level, but
1606      there is an exception if some other process substitutes a
1607      directory for a non-directory while 'grep' is running.  */
1608   if (fstat (desc, &st) != 0)
1609     {
1610       suppressible_error (filename, errno);
1611       goto closeout;
1612     }
1613 
1614   if (desc != STDIN_FILENO && skip_devices (command_line)
1615       && is_device_mode (st.st_mode))
1616     goto closeout;
1617 
1618   if (desc != STDIN_FILENO && command_line
1619       && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1620     goto closeout;
1621 
1622   if (desc != STDIN_FILENO
1623       && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1624     {
1625       /* Traverse the directory starting with its full name, because
1626          unfortunately fts provides no way to traverse the directory
1627          starting from its file descriptor.  */
1628 
1629       FTS *fts;
1630       FTSENT *ent;
1631       int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1632       char *fts_arg[2];
1633 
1634       /* Close DESC now, to conserve file descriptors if the race
1635          condition occurs many times in a deep recursion.  */
1636       if (close (desc) != 0)
1637         suppressible_error (filename, errno);
1638 
1639       fts_arg[0] = (char *) filename;
1640       fts_arg[1] = NULL;
1641       fts = fts_open (fts_arg, opts, NULL);
1642 
1643       if (!fts)
1644         xalloc_die ();
1645       while ((ent = fts_read (fts)))
1646         status &= grepdirent (fts, ent, command_line);
1647       if (errno)
1648         suppressible_error (filename, errno);
1649       if (fts_close (fts) != 0)
1650         suppressible_error (filename, errno);
1651       return status;
1652     }
1653   if (desc != STDIN_FILENO
1654       && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1655           || ((devices == SKIP_DEVICES
1656                || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1657               && is_device_mode (st.st_mode))))
1658     goto closeout;
1659 
1660   /* If there is a regular file on stdout and the current file refers
1661      to the same i-node, we have to report the problem and skip it.
1662      Otherwise when matching lines from some other input reach the
1663      disk before we open this file, we can end up reading and matching
1664      those lines and appending them to the file from which we're reading.
1665      Then we'd have what appears to be an infinite loop that'd terminate
1666      only upon filling the output file system or reaching a quota.
1667      However, there is no risk of an infinite loop if grep is generating
1668      no output, i.e., with --silent, --quiet, -q.
1669      Similarly, with any of these:
1670        --max-count=N (-m) (for N >= 2)
1671        --files-with-matches (-l)
1672        --files-without-match (-L)
1673      there is no risk of trouble.
1674      For --max-count=1, grep stops after printing the first match,
1675      so there is no risk of malfunction.  But even --max-count=2, with
1676      input==output, while there is no risk of infloop, there is a race
1677      condition that could result in "alternate" output.  */
1678   if (!out_quiet && list_files == 0 && 1 < max_count
1679       && S_ISREG (out_stat.st_mode) && out_stat.st_ino
1680       && SAME_INODE (st, out_stat))
1681     {
1682       if (! suppress_errors)
1683         error (0, 0, _("input file %s is also the output"), quote (filename));
1684       errseen = true;
1685       goto closeout;
1686     }
1687 
1688 #if defined SET_BINARY
1689   /* Set input to binary mode.  Pipes are simulated with files
1690      on DOS, so this includes the case of "foo | grep bar".  */
1691   if (!isatty (desc))
1692     SET_BINARY (desc);
1693 #endif
1694 
1695   count = grep (desc, &st);
1696   if (count < 0)
1697     status = count + 2;
1698   else
1699     {
1700       if (count_matches)
1701         {
1702           if (out_file)
1703             {
1704               print_filename ();
1705               if (filename_mask)
1706                 print_sep (SEP_CHAR_SELECTED);
1707               else
1708                 fputc (0, stdout);
1709             }
1710           printf ("%" PRIdMAX "\n", count);
1711         }
1712 
1713       status = !count;
1714       if (list_files == 1 - 2 * status)
1715         {
1716           print_filename ();
1717           fputc ('\n' & filename_mask, stdout);
1718         }
1719 
1720       if (desc == STDIN_FILENO)
1721         {
1722           off_t required_offset = outleft ? bufoffset : after_last_match;
1723           if (required_offset != bufoffset
1724               && lseek (desc, required_offset, SEEK_SET) < 0
1725               && S_ISREG (st.st_mode))
1726             suppressible_error (filename, errno);
1727         }
1728     }
1729 
1730  closeout:
1731   if (desc != STDIN_FILENO && close (desc) != 0)
1732     suppressible_error (filename, errno);
1733   return status;
1734 }
1735 
1736 static bool
1737 grep_command_line_arg (char const *arg)
1738 {
1739   if (STREQ (arg, "-"))
1740     {
1741       filename = label ? label : _("(standard input)");
1742       return grepdesc (STDIN_FILENO, true);
1743     }
1744   else
1745     {
1746       filename = arg;
1747       return grepfile (AT_FDCWD, arg, true, true);
1748     }
1749 }
1750 
1751 _Noreturn void usage (int);
1752 void
1753 usage (int status)
1754 {
1755   if (status != 0)
1756     {
1757       fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
1758                program_name);
1759       fprintf (stderr, _("Try '%s --help' for more information.\n"),
1760                program_name);
1761     }
1762   else
1763     {
1764       printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
1765       printf (_("Search for PATTERN in each FILE or standard input.\n"));
1766       printf (_("PATTERN is, by default, a basic regular expression (BRE).\n"));
1767       printf (_("\
1768 Example: %s -i 'hello world' menu.h main.c\n\
1769 \n\
1770 Regexp selection and interpretation:\n"), program_name);
1771       printf (_("\
1772   -E, --extended-regexp     PATTERN is an extended regular expression (ERE)\n\
1773   -F, --fixed-strings       PATTERN is a set of newline-separated strings\n\
1774   -G, --basic-regexp        PATTERN is a basic regular expression (BRE)\n\
1775   -P, --perl-regexp         PATTERN is a Perl regular expression\n"));
1776   /* -X is deliberately undocumented.  */
1777       printf (_("\
1778   -e, --regexp=PATTERN      use PATTERN for matching\n\
1779   -f, --file=FILE           obtain PATTERN from FILE\n\
1780   -i, --ignore-case         ignore case distinctions\n\
1781   -w, --word-regexp         force PATTERN to match only whole words\n\
1782   -x, --line-regexp         force PATTERN to match only whole lines\n\
1783   -z, --null-data           a data line ends in 0 byte, not newline\n"));
1784       printf (_("\
1785 \n\
1786 Miscellaneous:\n\
1787   -s, --no-messages         suppress error messages\n\
1788   -v, --invert-match        select non-matching lines\n\
1789   -V, --version             display version information and exit\n\
1790       --help                display this help text and exit\n"));
1791       printf (_("\
1792 \n\
1793 Output control:\n\
1794   -m, --max-count=NUM       stop after NUM matches\n\
1795   -b, --byte-offset         print the byte offset with output lines\n\
1796   -n, --line-number         print line number with output lines\n\
1797       --line-buffered       flush output on every line\n\
1798   -H, --with-filename       print the file name for each match\n\
1799   -h, --no-filename         suppress the file name prefix on output\n\
1800       --label=LABEL         use LABEL as the standard input file name prefix\n\
1801 "));
1802       printf (_("\
1803   -o, --only-matching       show only the part of a line matching PATTERN\n\
1804   -q, --quiet, --silent     suppress all normal output\n\
1805       --binary-files=TYPE   assume that binary files are TYPE;\n\
1806                             TYPE is 'binary', 'text', or 'without-match'\n\
1807   -a, --text                equivalent to --binary-files=text\n\
1808 "));
1809       printf (_("\
1810   -I                        equivalent to --binary-files=without-match\n\
1811   -d, --directories=ACTION  how to handle directories;\n\
1812                             ACTION is 'read', 'recurse', or 'skip'\n\
1813   -D, --devices=ACTION      how to handle devices, FIFOs and sockets;\n\
1814                             ACTION is 'read' or 'skip'\n\
1815   -r, --recursive           like --directories=recurse\n\
1816   -R, --dereference-recursive  likewise, but follow all symlinks\n\
1817 "));
1818       printf (_("\
1819       --include=FILE_PATTERN  search only files that match FILE_PATTERN\n\
1820       --exclude=FILE_PATTERN  skip files and directories matching\
1821  FILE_PATTERN\n\
1822       --exclude-from=FILE   skip files matching any file pattern from FILE\n\
1823       --exclude-dir=PATTERN  directories that match PATTERN will be skipped.\n\
1824 "));
1825       printf (_("\
1826   -L, --files-without-match  print only names of FILEs containing no match\n\
1827   -l, --files-with-matches  print only names of FILEs containing matches\n\
1828   -c, --count               print only a count of matching lines per FILE\n\
1829   -T, --initial-tab         make tabs line up (if needed)\n\
1830   -Z, --null                print 0 byte after FILE name\n"));
1831       printf (_("\
1832 \n\
1833 Context control:\n\
1834   -B, --before-context=NUM  print NUM lines of leading context\n\
1835   -A, --after-context=NUM   print NUM lines of trailing context\n\
1836   -C, --context=NUM         print NUM lines of output context\n\
1837 "));
1838       printf (_("\
1839   -NUM                      same as --context=NUM\n\
1840       --color[=WHEN],\n\
1841       --colour[=WHEN]       use markers to highlight the matching strings;\n\
1842                             WHEN is 'always', 'never', or 'auto'\n\
1843   -U, --binary              do not strip CR characters at EOL (MSDOS/Windows)\n\
1844   -u, --unix-byte-offsets   report offsets as if CRs were not there\n\
1845                             (MSDOS/Windows)\n\
1846 \n"));
1847       printf (_("\
1848 'egrep' means 'grep -E'.  'fgrep' means 'grep -F'.\n\
1849 Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"));
1850       printf (_("\
1851 When FILE is -, read standard input.  With no FILE, read . if a command-line\n\
1852 -r is given, - otherwise.  If fewer than two FILEs are given, assume -h.\n\
1853 Exit status is 0 if any line is selected, 1 otherwise;\n\
1854 if any error occurs and -q is not given, the exit status is 2.\n"));
1855       emit_bug_reporting_address ();
1856     }
1857   exit (status);
1858 }
1859 
1860 /* Pattern compilers and matchers.  */
1861 
1862 static void
1863 Gcompile (char const *pattern, size_t size)
1864 {
1865   GEAcompile (pattern, size, RE_SYNTAX_GREP);
1866 }
1867 
1868 static void
1869 Ecompile (char const *pattern, size_t size)
1870 {
1871   GEAcompile (pattern, size, RE_SYNTAX_EGREP);
1872 }
1873 
1874 static void
1875 Acompile (char const *pattern, size_t size)
1876 {
1877   GEAcompile (pattern, size, RE_SYNTAX_AWK);
1878 }
1879 
1880 static void
1881 GAcompile (char const *pattern, size_t size)
1882 {
1883   GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK);
1884 }
1885 
1886 static void
1887 PAcompile (char const *pattern, size_t size)
1888 {
1889   GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK);
1890 }
1891 
1892 struct matcher
1893 {
1894   char const name[16];
1895   compile_fp_t compile;
1896   execute_fp_t execute;
1897 };
1898 static struct matcher const matchers[] = {
1899   { "grep",      Gcompile, EGexecute },
1900   { "egrep",     Ecompile, EGexecute },
1901   { "fgrep",     Fcompile,  Fexecute },
1902   { "awk",       Acompile, EGexecute },
1903   { "gawk",     GAcompile, EGexecute },
1904   { "posixawk", PAcompile, EGexecute },
1905   { "perl",      Pcompile,  Pexecute },
1906   { "", NULL, NULL },
1907 };
1908 
1909 /* Set the matcher to M if available.  Exit in case of conflicts or if
1910    M is not available.  */
1911 static void
1912 setmatcher (char const *m)
1913 {
1914   struct matcher const *p;
1915 
1916   if (matcher && !STREQ (matcher, m))
1917     error (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
1918 
1919   for (p = matchers; p->compile; p++)
1920     if (STREQ (m, p->name))
1921       {
1922         matcher = p->name;
1923         compile = p->compile;
1924         execute = p->execute;
1925         return;
1926       }
1927 
1928   error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
1929 }
1930 
1931 /* Find the white-space-separated options specified by OPTIONS, and
1932    using BUF to store copies of these options, set ARGV[0], ARGV[1],
1933    etc. to the option copies.  Return the number N of options found.
1934    Do not set ARGV[N] to NULL.  If ARGV is NULL, do not store ARGV[0]
1935    etc.  Backslash can be used to escape whitespace (and backslashes).  */
1936 static size_t
1937 prepend_args (char const *options, char *buf, char **argv)
1938 {
1939   char const *o = options;
1940   char *b = buf;
1941   size_t n = 0;
1942 
1943   for (;;)
1944     {
1945       while (c_isspace (to_uchar (*o)))
1946         o++;
1947       if (!*o)
1948         return n;
1949       if (argv)
1950         argv[n] = b;
1951       n++;
1952 
1953       do
1954         if ((*b++ = *o++) == '\\' && *o)
1955           b[-1] = *o++;
1956       while (*o && ! c_isspace (to_uchar (*o)));
1957 
1958       *b++ = '\0';
1959     }
1960 }
1961 
1962 /* Prepend the whitespace-separated options in OPTIONS to the argument
1963    vector of a main program with argument count *PARGC and argument
1964    vector *PARGV.  Return the number of options prepended.  */
1965 static int
1966 prepend_default_options (char const *options, int *pargc, char ***pargv)
1967 {
1968   if (options && *options)
1969     {
1970       char *buf = xmalloc (strlen (options) + 1);
1971       size_t prepended = prepend_args (options, buf, NULL);
1972       int argc = *pargc;
1973       char *const *argv = *pargv;
1974       char **pp;
1975       enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
1976       if (MAX_ARGS - argc < prepended)
1977         xalloc_die ();
1978       pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
1979       *pargc = prepended + argc;
1980       *pargv = pp;
1981       *pp++ = *argv++;
1982       pp += prepend_args (options, buf, pp);
1983       while ((*pp++ = *argv++))
1984         continue;
1985       return prepended;
1986     }
1987 
1988   return 0;
1989 }
1990 
1991 /* Get the next non-digit option from ARGC and ARGV.
1992    Return -1 if there are no more options.
1993    Process any digit options that were encountered on the way,
1994    and store the resulting integer into *DEFAULT_CONTEXT.  */
1995 static int
1996 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
1997 {
1998   static int prev_digit_optind = -1;
1999   int this_digit_optind;
2000   bool was_digit;
2001   char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2002   char *p = buf;
2003   int opt;
2004 
2005   was_digit = false;
2006   this_digit_optind = optind;
2007   while (true)
2008     {
2009       opt = getopt_long (argc, (char **) argv, short_options,
2010                          long_options, NULL);
2011       if ( ! ('0' <= opt && opt <= '9'))
2012         break;
2013 
2014       if (prev_digit_optind != this_digit_optind || !was_digit)
2015         {
2016           /* Reset to start another context length argument.  */
2017           p = buf;
2018         }
2019       else
2020         {
2021           /* Suppress trivial leading zeros, to avoid incorrect
2022              diagnostic on strings like 00000000000.  */
2023           p -= buf[0] == '0';
2024         }
2025 
2026       if (p == buf + sizeof buf - 4)
2027         {
2028           /* Too many digits.  Append "..." to make context_length_arg
2029              complain about "X...", where X contains the digits seen
2030              so far.  */
2031           strcpy (p, "...");
2032           p += 3;
2033           break;
2034         }
2035       *p++ = opt;
2036 
2037       was_digit = true;
2038       prev_digit_optind = this_digit_optind;
2039       this_digit_optind = optind;
2040     }
2041   if (p != buf)
2042     {
2043       *p = '\0';
2044       context_length_arg (buf, default_context);
2045     }
2046 
2047   return opt;
2048 }
2049 
2050 /* Parse GREP_COLORS.  The default would look like:
2051      GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2052    with boolean capabilities (ne and rv) unset (i.e., omitted).
2053    No character escaping is needed or supported.  */
2054 static void
2055 parse_grep_colors (void)
2056 {
2057   const char *p;
2058   char *q;
2059   char *name;
2060   char *val;
2061 
2062   p = getenv ("GREP_COLORS"); /* Plural! */
2063   if (p == NULL || *p == '\0')
2064     return;
2065 
2066   /* Work off a writable copy.  */
2067   q = xstrdup (p);
2068 
2069   name = q;
2070   val = NULL;
2071   /* From now on, be well-formed or you're gone.  */
2072   for (;;)
2073     if (*q == ':' || *q == '\0')
2074       {
2075         char c = *q;
2076         struct color_cap const *cap;
2077 
2078         *q++ = '\0'; /* Terminate name or val.  */
2079         /* Empty name without val (empty cap)
2080          * won't match and will be ignored.  */
2081         for (cap = color_dict; cap->name; cap++)
2082           if (STREQ (cap->name, name))
2083             break;
2084         /* If name unknown, go on for forward compatibility.  */
2085         if (cap->var && val)
2086           *(cap->var) = val;
2087         if (cap->fct)
2088           cap->fct ();
2089         if (c == '\0')
2090           return;
2091         name = q;
2092         val = NULL;
2093       }
2094     else if (*q == '=')
2095       {
2096         if (q == name || val)
2097           return;
2098         *q++ = '\0'; /* Terminate name.  */
2099         val = q; /* Can be the empty string.  */
2100       }
2101     else if (val == NULL)
2102       q++; /* Accumulate name.  */
2103     else if (*q == ';' || (*q >= '0' && *q <= '9'))
2104       q++; /* Accumulate val.  Protect the terminal from being sent crap.  */
2105     else
2106       return;
2107 }
2108 
2109 /* Return true if PAT (of length PATLEN) contains an encoding error.  */
2110 static bool
2111 contains_encoding_error (char const *pat, size_t patlen)
2112 {
2113   mbstate_t mbs = { 0 };
2114   size_t i, charlen;
2115 
2116   for (i = 0; i < patlen; i += charlen)
2117     {
2118       charlen = mb_clen (pat + i, patlen - i, &mbs);
2119       if ((size_t) -2 <= charlen)
2120         return true;
2121     }
2122   return false;
2123 }
2124 
2125 /* Change a pattern for fgrep into grep.  */
2126 static void
2127 fgrep_to_grep_pattern (size_t len, char const *keys,
2128                        size_t *new_len, char **new_keys)
2129 {
2130   char *p = *new_keys = xnmalloc (len + 1, 2);
2131   mbstate_t mb_state = { 0 };
2132   size_t n;
2133 
2134   for (; len; keys += n, len -= n)
2135     {
2136       n = mb_clen (keys, len, &mb_state);
2137       switch (n)
2138         {
2139         case (size_t) -2:
2140           n = len;
2141           /* Fall through.  */
2142         default:
2143           p = mempcpy (p, keys, n);
2144           break;
2145 
2146         case (size_t) -1:
2147           memset (&mb_state, 0, sizeof mb_state);
2148           /* Fall through.  */
2149         case 1:
2150           *p = '\\';
2151           p += strchr ("$*.[\\^", *keys) != NULL;
2152           /* Fall through.  */
2153         case 0:
2154           *p++ = *keys;
2155           n = 1;
2156           break;
2157         }
2158     }
2159 
2160   *new_len = p - *new_keys;
2161 }
2162 
2163 int
2164 main (int argc, char **argv)
2165 {
2166   char *keys;
2167   size_t keycc, oldcc, keyalloc;
2168   bool with_filenames;
2169   size_t cc;
2170   int opt, prepended;
2171   int prev_optind, last_recursive;
2172   int fread_errno;
2173   intmax_t default_context;
2174   FILE *fp;
2175   exit_failure = EXIT_TROUBLE;
2176   initialize_main (&argc, &argv);
2177   set_program_name (argv[0]);
2178   program_name = argv[0];
2179 
2180   keys = NULL;
2181   keycc = 0;
2182   with_filenames = false;
2183   eolbyte = '\n';
2184   filename_mask = ~0;
2185 
2186   max_count = INTMAX_MAX;
2187 
2188   /* The value -1 means to use DEFAULT_CONTEXT. */
2189   out_after = out_before = -1;
2190   /* Default before/after context: changed by -C/-NUM options */
2191   default_context = -1;
2192   /* Changed by -o option */
2193   only_matching = false;
2194 
2195   /* Internationalization. */
2196 #if defined HAVE_SETLOCALE
2197   setlocale (LC_ALL, "");
2198 #endif
2199 #if defined ENABLE_NLS
2200   bindtextdomain (PACKAGE, LOCALEDIR);
2201   textdomain (PACKAGE);
2202 #endif
2203 
2204   exit_failure = EXIT_TROUBLE;
2205   atexit (clean_up_stdout);
2206 
2207   last_recursive = 0;
2208 
2209   prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
2210   if (prepended)
2211     error (0, 0, _("warning: GREP_OPTIONS is deprecated;"
2212                    " please use an alias or script"));
2213 
2214   compile = matchers[0].compile;
2215   execute = matchers[0].execute;
2216 
2217   while (prev_optind = optind,
2218          (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2219     switch (opt)
2220       {
2221       case 'A':
2222         context_length_arg (optarg, &out_after);
2223         break;
2224 
2225       case 'B':
2226         context_length_arg (optarg, &out_before);
2227         break;
2228 
2229       case 'C':
2230         /* Set output match context, but let any explicit leading or
2231            trailing amount specified with -A or -B stand. */
2232         context_length_arg (optarg, &default_context);
2233         break;
2234 
2235       case 'D':
2236         if (STREQ (optarg, "read"))
2237           devices = READ_DEVICES;
2238         else if (STREQ (optarg, "skip"))
2239           devices = SKIP_DEVICES;
2240         else
2241           error (EXIT_TROUBLE, 0, _("unknown devices method"));
2242         break;
2243 
2244       case 'E':
2245         setmatcher ("egrep");
2246         break;
2247 
2248       case 'F':
2249         setmatcher ("fgrep");
2250         break;
2251 
2252       case 'P':
2253         setmatcher ("perl");
2254         break;
2255 
2256       case 'G':
2257         setmatcher ("grep");
2258         break;
2259 
2260       case 'X': /* undocumented on purpose */
2261         setmatcher (optarg);
2262         break;
2263 
2264       case 'H':
2265         with_filenames = true;
2266         no_filenames = false;
2267         break;
2268 
2269       case 'I':
2270         binary_files = WITHOUT_MATCH_BINARY_FILES;
2271         break;
2272 
2273       case 'T':
2274         align_tabs = true;
2275         break;
2276 
2277       case 'U':
2278         dos_binary ();
2279         break;
2280 
2281       case 'u':
2282         dos_unix_byte_offsets ();
2283         break;
2284 
2285       case 'V':
2286         show_version = true;
2287         break;
2288 
2289       case 'a':
2290         binary_files = TEXT_BINARY_FILES;
2291         break;
2292 
2293       case 'b':
2294         out_byte = true;
2295         break;
2296 
2297       case 'c':
2298         count_matches = true;
2299         break;
2300 
2301       case 'd':
2302         directories = XARGMATCH ("--directories", optarg,
2303                                  directories_args, directories_types);
2304         if (directories == RECURSE_DIRECTORIES)
2305           last_recursive = prev_optind;
2306         break;
2307 
2308       case 'e':
2309         cc = strlen (optarg);
2310         keys = xrealloc (keys, keycc + cc + 1);
2311         strcpy (&keys[keycc], optarg);
2312         keycc += cc;
2313         keys[keycc++] = '\n';
2314         break;
2315 
2316       case 'f':
2317         fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r");
2318         if (!fp)
2319           error (EXIT_TROUBLE, errno, "%s", optarg);
2320         for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
2321           ;
2322         keys = xrealloc (keys, keyalloc);
2323         oldcc = keycc;
2324         while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
2325           {
2326             keycc += cc;
2327             if (keycc == keyalloc - 1)
2328               keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
2329           }
2330         fread_errno = errno;
2331         if (ferror (fp))
2332           error (EXIT_TROUBLE, fread_errno, "%s", optarg);
2333         if (fp != stdin)
2334           fclose (fp);
2335         /* Append final newline if file ended in non-newline. */
2336         if (oldcc != keycc && keys[keycc - 1] != '\n')
2337           keys[keycc++] = '\n';
2338         break;
2339 
2340       case 'h':
2341         with_filenames = false;
2342         no_filenames = true;
2343         break;
2344 
2345       case 'i':
2346       case 'y':			/* For old-timers . . . */
2347         match_icase = true;
2348         break;
2349 
2350       case 'L':
2351         /* Like -l, except list files that don't contain matches.
2352            Inspired by the same option in Hume's gre. */
2353         list_files = -1;
2354         break;
2355 
2356       case 'l':
2357         list_files = 1;
2358         break;
2359 
2360       case 'm':
2361         switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2362           {
2363           case LONGINT_OK:
2364           case LONGINT_OVERFLOW:
2365             break;
2366 
2367           default:
2368             error (EXIT_TROUBLE, 0, _("invalid max count"));
2369           }
2370         break;
2371 
2372       case 'n':
2373         out_line = true;
2374         break;
2375 
2376       case 'o':
2377         only_matching = true;
2378         break;
2379 
2380       case 'q':
2381         exit_on_match = true;
2382         exit_failure = 0;
2383         break;
2384 
2385       case 'R':
2386         fts_options = basic_fts_options | FTS_LOGICAL;
2387         /* Fall through.  */
2388       case 'r':
2389         directories = RECURSE_DIRECTORIES;
2390         last_recursive = prev_optind;
2391         break;
2392 
2393       case 's':
2394         suppress_errors = true;
2395         break;
2396 
2397       case 'v':
2398         out_invert = true;
2399         break;
2400 
2401       case 'w':
2402         match_words = true;
2403         break;
2404 
2405       case 'x':
2406         match_lines = true;
2407         break;
2408 
2409       case 'Z':
2410         filename_mask = 0;
2411         break;
2412 
2413       case 'z':
2414         eolbyte = '\0';
2415         break;
2416 
2417       case BINARY_FILES_OPTION:
2418         if (STREQ (optarg, "binary"))
2419           binary_files = BINARY_BINARY_FILES;
2420         else if (STREQ (optarg, "text"))
2421           binary_files = TEXT_BINARY_FILES;
2422         else if (STREQ (optarg, "without-match"))
2423           binary_files = WITHOUT_MATCH_BINARY_FILES;
2424         else
2425           error (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2426         break;
2427 
2428       case COLOR_OPTION:
2429         if (optarg)
2430           {
2431             if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes")
2432                 || !strcasecmp (optarg, "force"))
2433               color_option = 1;
2434             else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no")
2435                      || !strcasecmp (optarg, "none"))
2436               color_option = 0;
2437             else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty")
2438                      || !strcasecmp (optarg, "if-tty"))
2439               color_option = 2;
2440             else
2441               show_help = 1;
2442           }
2443         else
2444           color_option = 2;
2445         break;
2446 
2447       case EXCLUDE_OPTION:
2448       case INCLUDE_OPTION:
2449         if (!excluded_patterns)
2450           excluded_patterns = new_exclude ();
2451         add_exclude (excluded_patterns, optarg,
2452                      (EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS
2453                       | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)));
2454         break;
2455       case EXCLUDE_FROM_OPTION:
2456         if (!excluded_patterns)
2457           excluded_patterns = new_exclude ();
2458         if (add_exclude_file (add_exclude, excluded_patterns, optarg,
2459                               EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS, '\n') != 0)
2460           {
2461             error (EXIT_TROUBLE, errno, "%s", optarg);
2462           }
2463         break;
2464 
2465       case EXCLUDE_DIRECTORY_OPTION:
2466         if (!excluded_directory_patterns)
2467           excluded_directory_patterns = new_exclude ();
2468         strip_trailing_slashes (optarg);
2469         add_exclude (excluded_directory_patterns, optarg,
2470                      EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS);
2471         break;
2472 
2473       case GROUP_SEPARATOR_OPTION:
2474         group_separator = optarg;
2475         break;
2476 
2477       case LINE_BUFFERED_OPTION:
2478         line_buffered = true;
2479         break;
2480 
2481       case LABEL_OPTION:
2482         label = optarg;
2483         break;
2484 
2485       case 0:
2486         /* long options */
2487         break;
2488 
2489       default:
2490         usage (EXIT_TROUBLE);
2491         break;
2492 
2493       }
2494 
2495   if (color_option == 2)
2496     color_option = isatty (STDOUT_FILENO) && should_colorize ();
2497   init_colorize ();
2498 
2499   /* POSIX says that -q overrides -l, which in turn overrides the
2500      other output options.  */
2501   if (exit_on_match)
2502     list_files = 0;
2503   if (exit_on_match | list_files)
2504     {
2505       count_matches = false;
2506       done_on_match = true;
2507     }
2508   out_quiet = count_matches | done_on_match;
2509 
2510   if (out_after < 0)
2511     out_after = default_context;
2512   if (out_before < 0)
2513     out_before = default_context;
2514 
2515   if (color_option)
2516     {
2517       /* Legacy.  */
2518       char *userval = getenv ("GREP_COLOR");
2519       if (userval != NULL && *userval != '\0')
2520         selected_match_color = context_match_color = userval;
2521 
2522       /* New GREP_COLORS has priority.  */
2523       parse_grep_colors ();
2524     }
2525 
2526   if (show_version)
2527     {
2528       version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS,
2529                    (char *) NULL);
2530       return EXIT_SUCCESS;
2531     }
2532 
2533   if (show_help)
2534     usage (EXIT_SUCCESS);
2535 
2536   struct stat tmp_stat;
2537   if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
2538     out_stat = tmp_stat;
2539 
2540   if (keys)
2541     {
2542       if (keycc == 0)
2543         {
2544           /* No keys were specified (e.g. -f /dev/null).  Match nothing.  */
2545           out_invert ^= true;
2546           match_lines = match_words = false;
2547         }
2548       else
2549         /* Strip trailing newline. */
2550         --keycc;
2551     }
2552   else if (optind < argc)
2553     {
2554       /* A copy must be made in case of an xrealloc() or free() later.  */
2555       keycc = strlen (argv[optind]);
2556       keys = xmemdup (argv[optind++], keycc + 1);
2557     }
2558   else
2559     usage (EXIT_TROUBLE);
2560 
2561   build_mbclen_cache ();
2562   init_easy_encoding ();
2563 
2564   /* In a unibyte locale, switch from fgrep to grep if
2565      the pattern matches words (where grep is typically faster).
2566      In a multibyte locale, switch from fgrep to grep if either
2567      (1) case is ignored (where grep is typically faster), or
2568      (2) the pattern has an encoding error (where fgrep might not work).  */
2569   if (compile == Fcompile
2570       && (MB_CUR_MAX <= 1
2571           ? match_words
2572           : match_icase || contains_encoding_error (keys, keycc)))
2573     {
2574       size_t new_keycc;
2575       char *new_keys;
2576       fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
2577       free (keys);
2578       keys = new_keys;
2579       keycc = new_keycc;
2580       matcher = "grep";
2581       compile = Gcompile;
2582       execute = EGexecute;
2583     }
2584 
2585   compile (keys, keycc);
2586   free (keys);
2587   /* We need one byte prior and one after.  */
2588   char eolbytes[3] = { 0, eolbyte, 0 };
2589   size_t match_size;
2590   skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
2591                       == out_invert);
2592 
2593   if ((argc - optind > 1 && !no_filenames) || with_filenames)
2594     out_file = 1;
2595 
2596 #ifdef SET_BINARY
2597   /* Output is set to binary mode because we shouldn't convert
2598      NL to CR-LF pairs, especially when grepping binary files.  */
2599   if (!isatty (STDOUT_FILENO))
2600     SET_BINARY (STDOUT_FILENO);
2601 #endif
2602 
2603   if (max_count == 0)
2604     return EXIT_FAILURE;
2605 
2606   if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2607     devices = READ_DEVICES;
2608 
2609   char *const *files;
2610   if (optind < argc)
2611     {
2612       files = argv + optind;
2613     }
2614   else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
2615     {
2616       static char *const cwd_only[] = { (char *) ".", NULL };
2617       files = cwd_only;
2618       omit_dot_slash = true;
2619     }
2620   else
2621     {
2622       static char *const stdin_only[] = { (char *) "-", NULL };
2623       files = stdin_only;
2624     }
2625 
2626   bool status = true;
2627   do
2628     status &= grep_command_line_arg (*files++);
2629   while (*files != NULL);
2630 
2631   /* We register via atexit() to test stdout.  */
2632   return errseen ? EXIT_TROUBLE : status;
2633 }
2634