1680a9cb8SJohn Marino /* grep.c - main driver file for grep.
2*09d4459fSDaniel Fojt Copyright (C) 1992, 1997-2002, 2004-2020 Free Software Foundation, Inc.
3680a9cb8SJohn Marino
4680a9cb8SJohn Marino This program is free software; you can redistribute it and/or modify
5680a9cb8SJohn Marino it under the terms of the GNU General Public License as published by
6680a9cb8SJohn Marino the Free Software Foundation; either version 3, or (at your option)
7680a9cb8SJohn Marino any later version.
8680a9cb8SJohn Marino
9680a9cb8SJohn Marino This program is distributed in the hope that it will be useful,
10680a9cb8SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
11680a9cb8SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12680a9cb8SJohn Marino GNU General Public License for more details.
13680a9cb8SJohn Marino
14680a9cb8SJohn Marino You should have received a copy of the GNU General Public License
15680a9cb8SJohn Marino along with this program; if not, write to the Free Software
16680a9cb8SJohn Marino Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17680a9cb8SJohn Marino 02110-1301, USA. */
18680a9cb8SJohn Marino
19680a9cb8SJohn Marino /* Written July 1992 by Mike Haertel. */
20680a9cb8SJohn Marino
2195b7b453SJohn Marino #include <config.h>
22680a9cb8SJohn Marino #include <sys/types.h>
23680a9cb8SJohn Marino #include <sys/stat.h>
24680a9cb8SJohn Marino #include <wchar.h>
25680a9cb8SJohn Marino #include <inttypes.h>
26*09d4459fSDaniel Fojt #include <stdarg.h>
27680a9cb8SJohn Marino #include <stdio.h>
28680a9cb8SJohn Marino #include "system.h"
29680a9cb8SJohn Marino
30680a9cb8SJohn Marino #include "argmatch.h"
31680a9cb8SJohn Marino #include "c-ctype.h"
32*09d4459fSDaniel Fojt #include "c-stack.h"
33680a9cb8SJohn Marino #include "closeout.h"
34680a9cb8SJohn Marino #include "colorize.h"
35*09d4459fSDaniel Fojt #include "die.h"
36680a9cb8SJohn Marino #include "error.h"
37680a9cb8SJohn Marino #include "exclude.h"
38680a9cb8SJohn Marino #include "exitfail.h"
39680a9cb8SJohn Marino #include "fcntl-safer.h"
40680a9cb8SJohn Marino #include "fts_.h"
41680a9cb8SJohn Marino #include "getopt.h"
42*09d4459fSDaniel Fojt #include "getprogname.h"
43680a9cb8SJohn Marino #include "grep.h"
44680a9cb8SJohn Marino #include "intprops.h"
45680a9cb8SJohn Marino #include "propername.h"
46680a9cb8SJohn Marino #include "quote.h"
47680a9cb8SJohn Marino #include "safe-read.h"
4895b7b453SJohn Marino #include "search.h"
49*09d4459fSDaniel Fojt #include "c-strcase.h"
50680a9cb8SJohn Marino #include "version-etc.h"
51680a9cb8SJohn Marino #include "xalloc.h"
52*09d4459fSDaniel Fojt #include "xbinary-io.h"
53680a9cb8SJohn Marino #include "xstrtol.h"
54680a9cb8SJohn Marino
55*09d4459fSDaniel Fojt enum { SEP_CHAR_SELECTED = ':' };
56*09d4459fSDaniel Fojt enum { SEP_CHAR_REJECTED = '-' };
57*09d4459fSDaniel Fojt static char const SEP_STR_GROUP[] = "--";
58680a9cb8SJohn Marino
59680a9cb8SJohn Marino /* When stdout is connected to a regular file, save its stat
60680a9cb8SJohn Marino information here, so that we can automatically skip it, thus
61680a9cb8SJohn Marino avoiding a potential (racy) infinite loop. */
62680a9cb8SJohn Marino static struct stat out_stat;
63680a9cb8SJohn Marino
64680a9cb8SJohn Marino /* if non-zero, display usage information and exit */
65680a9cb8SJohn Marino static int show_help;
66680a9cb8SJohn Marino
67dc7c36e4SJohn Marino /* Print the version on standard output and exit. */
68dc7c36e4SJohn Marino static bool show_version;
69680a9cb8SJohn Marino
70dc7c36e4SJohn Marino /* Suppress diagnostics for nonexistent or unreadable files. */
71dc7c36e4SJohn Marino static bool suppress_errors;
72680a9cb8SJohn Marino
73680a9cb8SJohn Marino /* If nonzero, use color markers. */
74680a9cb8SJohn Marino static int color_option;
75680a9cb8SJohn Marino
76dc7c36e4SJohn Marino /* Show only the part of a line matching the expression. */
77dc7c36e4SJohn Marino static bool only_matching;
78680a9cb8SJohn Marino
79680a9cb8SJohn Marino /* If nonzero, make sure first content char in a line is on a tab stop. */
80dc7c36e4SJohn Marino static bool align_tabs;
81dc7c36e4SJohn Marino
82*09d4459fSDaniel Fojt /* Print width of line numbers and byte offsets. Nonzero if ALIGN_TABS. */
83*09d4459fSDaniel Fojt static int offset_width;
84*09d4459fSDaniel Fojt
85*09d4459fSDaniel Fojt /* See below */
86*09d4459fSDaniel Fojt struct FL_pair
87*09d4459fSDaniel Fojt {
88*09d4459fSDaniel Fojt char const *filename;
89*09d4459fSDaniel Fojt size_t lineno;
90*09d4459fSDaniel Fojt };
91*09d4459fSDaniel Fojt
92*09d4459fSDaniel Fojt /* A list of lineno,filename pairs corresponding to -f FILENAME
93*09d4459fSDaniel Fojt arguments. Since we store the concatenation of all patterns in
94*09d4459fSDaniel Fojt a single array, KEYS, be they from the command line via "-e PAT"
95*09d4459fSDaniel Fojt or read from one or more -f-specified FILENAMES. Given this
96*09d4459fSDaniel Fojt invocation, grep -f <(seq 5) -f <(seq 2) -f <(seq 3) FILE, there
97*09d4459fSDaniel Fojt will be three entries in LF_PAIR: {1, x} {6, y} {8, z}, where
98*09d4459fSDaniel Fojt x, y and z are just place-holders for shell-generated names. */
99*09d4459fSDaniel Fojt static struct FL_pair *fl_pair;
100*09d4459fSDaniel Fojt static size_t n_fl_pair_slots;
101*09d4459fSDaniel Fojt /* Count not only -f-specified files, but also individual -e operands
102*09d4459fSDaniel Fojt and any command-line argument that serves as a regular expression. */
103*09d4459fSDaniel Fojt static size_t n_pattern_files;
104*09d4459fSDaniel Fojt
105*09d4459fSDaniel Fojt /* The number of patterns seen so far.
106*09d4459fSDaniel Fojt It is advanced by fl_add and, when needed, used in pattern_file_name
107*09d4459fSDaniel Fojt to derive a file-relative line number. */
108*09d4459fSDaniel Fojt static size_t n_patterns;
109*09d4459fSDaniel Fojt
110*09d4459fSDaniel Fojt /* Return the number of newline bytes in BUF with size SIZE. */
111*09d4459fSDaniel Fojt static size_t _GL_ATTRIBUTE_PURE
count_nl_bytes(char const * buf,size_t size)112*09d4459fSDaniel Fojt count_nl_bytes (char const *buf, size_t size)
113*09d4459fSDaniel Fojt {
114*09d4459fSDaniel Fojt char const *p = buf;
115*09d4459fSDaniel Fojt char const *end_p = buf + size;
116*09d4459fSDaniel Fojt size_t n = 0;
117*09d4459fSDaniel Fojt while ((p = memchr (p, '\n', end_p - p)))
118*09d4459fSDaniel Fojt p++, n++;
119*09d4459fSDaniel Fojt return n;
120*09d4459fSDaniel Fojt }
121*09d4459fSDaniel Fojt
122*09d4459fSDaniel Fojt /* Append a FILENAME,line-number pair to FL_PAIR, and update
123*09d4459fSDaniel Fojt pattern-related counts from the contents of BUF with SIZE bytes. */
124*09d4459fSDaniel Fojt static void
fl_add(char const * buf,size_t size,char const * filename)125*09d4459fSDaniel Fojt fl_add (char const *buf, size_t size, char const *filename)
126*09d4459fSDaniel Fojt {
127*09d4459fSDaniel Fojt if (n_fl_pair_slots <= n_pattern_files)
128*09d4459fSDaniel Fojt fl_pair = x2nrealloc (fl_pair, &n_fl_pair_slots, sizeof *fl_pair);
129*09d4459fSDaniel Fojt
130*09d4459fSDaniel Fojt fl_pair[n_pattern_files].lineno = n_patterns + 1;
131*09d4459fSDaniel Fojt fl_pair[n_pattern_files].filename = filename;
132*09d4459fSDaniel Fojt n_pattern_files++;
133*09d4459fSDaniel Fojt n_patterns += count_nl_bytes (buf, size);
134*09d4459fSDaniel Fojt }
135*09d4459fSDaniel Fojt
136*09d4459fSDaniel Fojt /* Map the line number, LINENO, of one of the input patterns to the
137*09d4459fSDaniel Fojt name of the file from which it came. If it was read from stdin
138*09d4459fSDaniel Fojt or if it was specified on the command line, return "-". */
139*09d4459fSDaniel Fojt char const * _GL_ATTRIBUTE_PURE
pattern_file_name(size_t lineno,size_t * new_lineno)140*09d4459fSDaniel Fojt pattern_file_name (size_t lineno, size_t *new_lineno)
141*09d4459fSDaniel Fojt {
142*09d4459fSDaniel Fojt size_t i;
143*09d4459fSDaniel Fojt for (i = 1; i < n_pattern_files; i++)
144*09d4459fSDaniel Fojt {
145*09d4459fSDaniel Fojt if (lineno < fl_pair[i].lineno)
146*09d4459fSDaniel Fojt break;
147*09d4459fSDaniel Fojt }
148*09d4459fSDaniel Fojt
149*09d4459fSDaniel Fojt *new_lineno = lineno - fl_pair[i - 1].lineno + 1;
150*09d4459fSDaniel Fojt return fl_pair[i - 1].filename;
151*09d4459fSDaniel Fojt }
152*09d4459fSDaniel Fojt
153dc7c36e4SJohn Marino #if HAVE_ASAN
154dc7c36e4SJohn Marino /* Record the starting address and length of the sole poisoned region,
155dc7c36e4SJohn Marino so that we can unpoison it later, just before each following read. */
156dc7c36e4SJohn Marino static void const *poison_buf;
157dc7c36e4SJohn Marino static size_t poison_len;
158dc7c36e4SJohn Marino
159dc7c36e4SJohn Marino static void
clear_asan_poison(void)160dc7c36e4SJohn Marino clear_asan_poison (void)
161dc7c36e4SJohn Marino {
162dc7c36e4SJohn Marino if (poison_buf)
163dc7c36e4SJohn Marino __asan_unpoison_memory_region (poison_buf, poison_len);
164dc7c36e4SJohn Marino }
165dc7c36e4SJohn Marino
166dc7c36e4SJohn Marino static void
asan_poison(void const * addr,size_t size)167dc7c36e4SJohn Marino asan_poison (void const *addr, size_t size)
168dc7c36e4SJohn Marino {
169dc7c36e4SJohn Marino poison_buf = addr;
170dc7c36e4SJohn Marino poison_len = size;
171dc7c36e4SJohn Marino
172dc7c36e4SJohn Marino __asan_poison_memory_region (poison_buf, poison_len);
173dc7c36e4SJohn Marino }
174dc7c36e4SJohn Marino #else
clear_asan_poison(void)175dc7c36e4SJohn Marino static void clear_asan_poison (void) { }
asan_poison(void const volatile * addr,size_t size)176dc7c36e4SJohn Marino static void asan_poison (void const volatile *addr, size_t size) { }
177dc7c36e4SJohn Marino #endif
178680a9cb8SJohn Marino
179680a9cb8SJohn Marino /* The group separator used when context is requested. */
180680a9cb8SJohn Marino static const char *group_separator = SEP_STR_GROUP;
181680a9cb8SJohn Marino
182680a9cb8SJohn Marino /* The context and logic for choosing default --color screen attributes
183680a9cb8SJohn Marino (foreground and background colors, etc.) are the following.
184680a9cb8SJohn Marino -- There are eight basic colors available, each with its own
185680a9cb8SJohn Marino nominal luminosity to the human eye and foreground/background
186680a9cb8SJohn Marino codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
187680a9cb8SJohn Marino magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
188680a9cb8SJohn Marino yellow [89 %, 33/43], and white [100 %, 37/47]).
189680a9cb8SJohn Marino -- Sometimes, white as a background is actually implemented using
190680a9cb8SJohn Marino a shade of light gray, so that a foreground white can be visible
191680a9cb8SJohn Marino on top of it (but most often not).
192680a9cb8SJohn Marino -- Sometimes, black as a foreground is actually implemented using
193680a9cb8SJohn Marino a shade of dark gray, so that it can be visible on top of a
194680a9cb8SJohn Marino background black (but most often not).
195680a9cb8SJohn Marino -- Sometimes, more colors are available, as extensions.
196680a9cb8SJohn Marino -- Other attributes can be selected/deselected (bold [1/22],
197680a9cb8SJohn Marino underline [4/24], standout/inverse [7/27], blink [5/25], and
198680a9cb8SJohn Marino invisible/hidden [8/28]). They are sometimes implemented by
199680a9cb8SJohn Marino using colors instead of what their names imply; e.g., bold is
200680a9cb8SJohn Marino often achieved by using brighter colors. In practice, only bold
201680a9cb8SJohn Marino is really available to us, underline sometimes being mapped by
202680a9cb8SJohn Marino the terminal to some strange color choice, and standout best
203680a9cb8SJohn Marino being left for use by downstream programs such as less(1).
204680a9cb8SJohn Marino -- We cannot assume that any of the extensions or special features
205680a9cb8SJohn Marino are available for the purpose of choosing defaults for everyone.
206680a9cb8SJohn Marino -- The most prevalent default terminal backgrounds are pure black
207680a9cb8SJohn Marino and pure white, and are not necessarily the same shades of
208680a9cb8SJohn Marino those as if they were selected explicitly with SGR sequences.
209680a9cb8SJohn Marino Some terminals use dark or light pictures as default background,
210680a9cb8SJohn Marino but those are covered over by an explicit selection of background
211680a9cb8SJohn Marino color with an SGR sequence; their users will appreciate their
212680a9cb8SJohn Marino background pictures not be covered like this, if possible.
213680a9cb8SJohn Marino -- Some uses of colors attributes is to make some output items
214680a9cb8SJohn Marino more understated (e.g., context lines); this cannot be achieved
215680a9cb8SJohn Marino by changing the background color.
216680a9cb8SJohn Marino -- For these reasons, the grep color defaults should strive not
217680a9cb8SJohn Marino to change the background color from its default, unless it's
218680a9cb8SJohn Marino for a short item that should be highlighted, not understated.
219680a9cb8SJohn Marino -- The grep foreground color defaults (without an explicitly set
220680a9cb8SJohn Marino background) should provide enough contrast to be readable on any
221680a9cb8SJohn Marino terminal with either a black (dark) or white (light) background.
222680a9cb8SJohn Marino This only leaves red, magenta, green, and cyan (and their bold
223680a9cb8SJohn Marino counterparts) and possibly bold blue. */
224680a9cb8SJohn Marino /* The color strings used for matched text.
225680a9cb8SJohn Marino The user can overwrite them using the deprecated
226680a9cb8SJohn Marino environment variable GREP_COLOR or the new GREP_COLORS. */
227680a9cb8SJohn Marino static const char *selected_match_color = "01;31"; /* bold red */
228680a9cb8SJohn Marino static const char *context_match_color = "01;31"; /* bold red */
229680a9cb8SJohn Marino
230680a9cb8SJohn Marino /* Other colors. Defaults look damn good. */
231680a9cb8SJohn Marino static const char *filename_color = "35"; /* magenta */
232680a9cb8SJohn Marino static const char *line_num_color = "32"; /* green */
233680a9cb8SJohn Marino static const char *byte_num_color = "32"; /* green */
234680a9cb8SJohn Marino static const char *sep_color = "36"; /* cyan */
235680a9cb8SJohn Marino static const char *selected_line_color = ""; /* default color pair */
236680a9cb8SJohn Marino static const char *context_line_color = ""; /* default color pair */
237680a9cb8SJohn Marino
238680a9cb8SJohn Marino /* Select Graphic Rendition (SGR, "\33[...m") strings. */
239680a9cb8SJohn Marino /* Also Erase in Line (EL) to Right ("\33[K") by default. */
240680a9cb8SJohn Marino /* Why have EL to Right after SGR?
241680a9cb8SJohn Marino -- The behavior of line-wrapping when at the bottom of the
242680a9cb8SJohn Marino terminal screen and at the end of the current line is often
243680a9cb8SJohn Marino such that a new line is introduced, entirely cleared with
244680a9cb8SJohn Marino the current background color which may be different from the
245680a9cb8SJohn Marino default one (see the boolean back_color_erase terminfo(5)
246680a9cb8SJohn Marino capability), thus scrolling the display by one line.
247680a9cb8SJohn Marino The end of this new line will stay in this background color
248680a9cb8SJohn Marino even after reverting to the default background color with
249680a9cb8SJohn Marino "\33[m', unless it is explicitly cleared again with "\33[K"
250680a9cb8SJohn Marino (which is the behavior the user would instinctively expect
251680a9cb8SJohn Marino from the whole thing). There may be some unavoidable
252680a9cb8SJohn Marino background-color flicker at the end of this new line because
253680a9cb8SJohn Marino of this (when timing with the monitor's redraw is just right).
254680a9cb8SJohn Marino -- The behavior of HT (tab, "\t") is usually the same as that of
255680a9cb8SJohn Marino Cursor Forward Tabulation (CHT) with a default parameter
256680a9cb8SJohn Marino of 1 ("\33[I"), i.e., it performs pure movement to the next
257680a9cb8SJohn Marino tab stop, without any clearing of either content or screen
258680a9cb8SJohn Marino attributes (including background color); try
259680a9cb8SJohn Marino printf 'asdfqwerzxcv\rASDF\tZXCV\n'
260680a9cb8SJohn Marino in a bash(1) shell to demonstrate this. This is not what the
261680a9cb8SJohn Marino user would instinctively expect of HT (but is ok for CHT).
262680a9cb8SJohn Marino The instinctive behavior would include clearing the terminal
263680a9cb8SJohn Marino cells that are skipped over by HT with blank cells in the
264680a9cb8SJohn Marino current screen attributes, including background color;
265680a9cb8SJohn Marino the boolean dest_tabs_magic_smso terminfo(5) capability
266680a9cb8SJohn Marino indicates this saner behavior for HT, but only some rare
267680a9cb8SJohn Marino terminals have it (although it also indicates a special
268680a9cb8SJohn Marino glitch with standout mode in the Teleray terminal for which
269680a9cb8SJohn Marino it was initially introduced). The remedy is to add "\33K"
270680a9cb8SJohn Marino after each SGR sequence, be it START (to fix the behavior
271680a9cb8SJohn Marino of any HT after that before another SGR) or END (to fix the
272680a9cb8SJohn Marino behavior of an HT in default background color that would
273680a9cb8SJohn Marino follow a line-wrapping at the bottom of the screen in another
274680a9cb8SJohn Marino background color, and to complement doing it after START).
275680a9cb8SJohn Marino Piping grep's output through a pager such as less(1) avoids
276680a9cb8SJohn Marino any HT problems since the pager performs tab expansion.
277680a9cb8SJohn Marino
278680a9cb8SJohn Marino Generic disadvantages of this remedy are:
279680a9cb8SJohn Marino -- Some very rare terminals might support SGR but not EL (nobody
280680a9cb8SJohn Marino will use "grep --color" on a terminal that does not support
281680a9cb8SJohn Marino SGR in the first place).
282680a9cb8SJohn Marino -- Having these extra control sequences might somewhat complicate
283680a9cb8SJohn Marino the task of any program trying to parse "grep --color"
284680a9cb8SJohn Marino output in order to extract structuring information from it.
285680a9cb8SJohn Marino A specific disadvantage to doing it after SGR START is:
286680a9cb8SJohn Marino -- Even more possible background color flicker (when timing
287680a9cb8SJohn Marino with the monitor's redraw is just right), even when not at the
288680a9cb8SJohn Marino bottom of the screen.
289680a9cb8SJohn Marino There are no additional disadvantages specific to doing it after
290680a9cb8SJohn Marino SGR END.
291680a9cb8SJohn Marino
292680a9cb8SJohn Marino It would be impractical for GNU grep to become a full-fledged
293680a9cb8SJohn Marino terminal program linked against ncurses or the like, so it will
294680a9cb8SJohn Marino not detect terminfo(5) capabilities. */
295680a9cb8SJohn Marino static const char *sgr_start = "\33[%sm\33[K";
296680a9cb8SJohn Marino static const char *sgr_end = "\33[m\33[K";
297680a9cb8SJohn Marino
298680a9cb8SJohn Marino /* SGR utility functions. */
299680a9cb8SJohn Marino static void
pr_sgr_start(char const * s)300680a9cb8SJohn Marino pr_sgr_start (char const *s)
301680a9cb8SJohn Marino {
302680a9cb8SJohn Marino if (*s)
303680a9cb8SJohn Marino print_start_colorize (sgr_start, s);
304680a9cb8SJohn Marino }
305680a9cb8SJohn Marino static void
pr_sgr_end(char const * s)306680a9cb8SJohn Marino pr_sgr_end (char const *s)
307680a9cb8SJohn Marino {
308680a9cb8SJohn Marino if (*s)
309680a9cb8SJohn Marino print_end_colorize (sgr_end);
310680a9cb8SJohn Marino }
311680a9cb8SJohn Marino static void
pr_sgr_start_if(char const * s)312680a9cb8SJohn Marino pr_sgr_start_if (char const *s)
313680a9cb8SJohn Marino {
314680a9cb8SJohn Marino if (color_option)
315680a9cb8SJohn Marino pr_sgr_start (s);
316680a9cb8SJohn Marino }
317680a9cb8SJohn Marino static void
pr_sgr_end_if(char const * s)318680a9cb8SJohn Marino pr_sgr_end_if (char const *s)
319680a9cb8SJohn Marino {
320680a9cb8SJohn Marino if (color_option)
321680a9cb8SJohn Marino pr_sgr_end (s);
322680a9cb8SJohn Marino }
323680a9cb8SJohn Marino
324680a9cb8SJohn Marino struct color_cap
325680a9cb8SJohn Marino {
326680a9cb8SJohn Marino const char *name;
327680a9cb8SJohn Marino const char **var;
328680a9cb8SJohn Marino void (*fct) (void);
329680a9cb8SJohn Marino };
330680a9cb8SJohn Marino
331680a9cb8SJohn Marino static void
color_cap_mt_fct(void)332680a9cb8SJohn Marino color_cap_mt_fct (void)
333680a9cb8SJohn Marino {
334680a9cb8SJohn Marino /* Our caller just set selected_match_color. */
335680a9cb8SJohn Marino context_match_color = selected_match_color;
336680a9cb8SJohn Marino }
337680a9cb8SJohn Marino
338680a9cb8SJohn Marino static void
color_cap_rv_fct(void)339680a9cb8SJohn Marino color_cap_rv_fct (void)
340680a9cb8SJohn Marino {
341680a9cb8SJohn Marino /* By this point, it was 1 (or already -1). */
342680a9cb8SJohn Marino color_option = -1; /* That's still != 0. */
343680a9cb8SJohn Marino }
344680a9cb8SJohn Marino
345680a9cb8SJohn Marino static void
color_cap_ne_fct(void)346680a9cb8SJohn Marino color_cap_ne_fct (void)
347680a9cb8SJohn Marino {
348680a9cb8SJohn Marino sgr_start = "\33[%sm";
349680a9cb8SJohn Marino sgr_end = "\33[m";
350680a9cb8SJohn Marino }
351680a9cb8SJohn Marino
352680a9cb8SJohn Marino /* For GREP_COLORS. */
353680a9cb8SJohn Marino static const struct color_cap color_dict[] =
354680a9cb8SJohn Marino {
355680a9cb8SJohn Marino { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
356680a9cb8SJohn Marino { "ms", &selected_match_color, NULL }, /* selected matched text */
357680a9cb8SJohn Marino { "mc", &context_match_color, NULL }, /* context matched text */
358680a9cb8SJohn Marino { "fn", &filename_color, NULL }, /* filename */
359680a9cb8SJohn Marino { "ln", &line_num_color, NULL }, /* line number */
360680a9cb8SJohn Marino { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
361680a9cb8SJohn Marino { "se", &sep_color, NULL }, /* separator */
362680a9cb8SJohn Marino { "sl", &selected_line_color, NULL }, /* selected lines */
363680a9cb8SJohn Marino { "cx", &context_line_color, NULL }, /* context lines */
364680a9cb8SJohn Marino { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
365680a9cb8SJohn Marino { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
366680a9cb8SJohn Marino { NULL, NULL, NULL }
367680a9cb8SJohn Marino };
368680a9cb8SJohn Marino
369*09d4459fSDaniel Fojt /* Saved errno value from failed output functions on stdout. */
370*09d4459fSDaniel Fojt static int stdout_errno;
371*09d4459fSDaniel Fojt
372*09d4459fSDaniel Fojt static void
putchar_errno(int c)373*09d4459fSDaniel Fojt putchar_errno (int c)
374*09d4459fSDaniel Fojt {
375*09d4459fSDaniel Fojt if (putchar (c) < 0)
376*09d4459fSDaniel Fojt stdout_errno = errno;
377*09d4459fSDaniel Fojt }
378*09d4459fSDaniel Fojt
379*09d4459fSDaniel Fojt static void
fputs_errno(char const * s)380*09d4459fSDaniel Fojt fputs_errno (char const *s)
381*09d4459fSDaniel Fojt {
382*09d4459fSDaniel Fojt if (fputs (s, stdout) < 0)
383*09d4459fSDaniel Fojt stdout_errno = errno;
384*09d4459fSDaniel Fojt }
385*09d4459fSDaniel Fojt
386*09d4459fSDaniel Fojt static void _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2)
printf_errno(char const * format,...)387*09d4459fSDaniel Fojt printf_errno (char const *format, ...)
388*09d4459fSDaniel Fojt {
389*09d4459fSDaniel Fojt va_list ap;
390*09d4459fSDaniel Fojt va_start (ap, format);
391*09d4459fSDaniel Fojt if (vfprintf (stdout, format, ap) < 0)
392*09d4459fSDaniel Fojt stdout_errno = errno;
393*09d4459fSDaniel Fojt va_end (ap);
394*09d4459fSDaniel Fojt }
395*09d4459fSDaniel Fojt
396*09d4459fSDaniel Fojt static void
fwrite_errno(void const * ptr,size_t size,size_t nmemb)397*09d4459fSDaniel Fojt fwrite_errno (void const *ptr, size_t size, size_t nmemb)
398*09d4459fSDaniel Fojt {
399*09d4459fSDaniel Fojt if (fwrite (ptr, size, nmemb, stdout) != nmemb)
400*09d4459fSDaniel Fojt stdout_errno = errno;
401*09d4459fSDaniel Fojt }
402*09d4459fSDaniel Fojt
403*09d4459fSDaniel Fojt static void
fflush_errno(void)404*09d4459fSDaniel Fojt fflush_errno (void)
405*09d4459fSDaniel Fojt {
406*09d4459fSDaniel Fojt if (fflush (stdout) != 0)
407*09d4459fSDaniel Fojt stdout_errno = errno;
408*09d4459fSDaniel Fojt }
409*09d4459fSDaniel Fojt
410*09d4459fSDaniel Fojt static struct exclude *excluded_patterns[2];
411*09d4459fSDaniel Fojt static struct exclude *excluded_directory_patterns[2];
412680a9cb8SJohn Marino /* Short options. */
413680a9cb8SJohn Marino static char const short_options[] =
414680a9cb8SJohn Marino "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
415680a9cb8SJohn Marino
416680a9cb8SJohn Marino /* Non-boolean long options that have no corresponding short equivalents. */
417680a9cb8SJohn Marino enum
418680a9cb8SJohn Marino {
419680a9cb8SJohn Marino BINARY_FILES_OPTION = CHAR_MAX + 1,
420680a9cb8SJohn Marino COLOR_OPTION,
421dc7c36e4SJohn Marino EXCLUDE_DIRECTORY_OPTION,
422680a9cb8SJohn Marino EXCLUDE_OPTION,
423680a9cb8SJohn Marino EXCLUDE_FROM_OPTION,
424dc7c36e4SJohn Marino GROUP_SEPARATOR_OPTION,
425dc7c36e4SJohn Marino INCLUDE_OPTION,
426680a9cb8SJohn Marino LINE_BUFFERED_OPTION,
427*09d4459fSDaniel Fojt LABEL_OPTION,
428*09d4459fSDaniel Fojt NO_IGNORE_CASE_OPTION
429680a9cb8SJohn Marino };
430680a9cb8SJohn Marino
431680a9cb8SJohn Marino /* Long options equivalences. */
432680a9cb8SJohn Marino static struct option const long_options[] =
433680a9cb8SJohn Marino {
434680a9cb8SJohn Marino {"basic-regexp", no_argument, NULL, 'G'},
435680a9cb8SJohn Marino {"extended-regexp", no_argument, NULL, 'E'},
436680a9cb8SJohn Marino {"fixed-regexp", no_argument, NULL, 'F'},
437680a9cb8SJohn Marino {"fixed-strings", no_argument, NULL, 'F'},
438680a9cb8SJohn Marino {"perl-regexp", no_argument, NULL, 'P'},
439680a9cb8SJohn Marino {"after-context", required_argument, NULL, 'A'},
440680a9cb8SJohn Marino {"before-context", required_argument, NULL, 'B'},
441680a9cb8SJohn Marino {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
442680a9cb8SJohn Marino {"byte-offset", no_argument, NULL, 'b'},
443680a9cb8SJohn Marino {"context", required_argument, NULL, 'C'},
444680a9cb8SJohn Marino {"color", optional_argument, NULL, COLOR_OPTION},
445680a9cb8SJohn Marino {"colour", optional_argument, NULL, COLOR_OPTION},
446680a9cb8SJohn Marino {"count", no_argument, NULL, 'c'},
447680a9cb8SJohn Marino {"devices", required_argument, NULL, 'D'},
448680a9cb8SJohn Marino {"directories", required_argument, NULL, 'd'},
449680a9cb8SJohn Marino {"exclude", required_argument, NULL, EXCLUDE_OPTION},
450680a9cb8SJohn Marino {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
451680a9cb8SJohn Marino {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
452680a9cb8SJohn Marino {"file", required_argument, NULL, 'f'},
453680a9cb8SJohn Marino {"files-with-matches", no_argument, NULL, 'l'},
454680a9cb8SJohn Marino {"files-without-match", no_argument, NULL, 'L'},
455680a9cb8SJohn Marino {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
456680a9cb8SJohn Marino {"help", no_argument, &show_help, 1},
457680a9cb8SJohn Marino {"include", required_argument, NULL, INCLUDE_OPTION},
458680a9cb8SJohn Marino {"ignore-case", no_argument, NULL, 'i'},
459*09d4459fSDaniel Fojt {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
460680a9cb8SJohn Marino {"initial-tab", no_argument, NULL, 'T'},
461680a9cb8SJohn Marino {"label", required_argument, NULL, LABEL_OPTION},
462680a9cb8SJohn Marino {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
463680a9cb8SJohn Marino {"line-number", no_argument, NULL, 'n'},
464680a9cb8SJohn Marino {"line-regexp", no_argument, NULL, 'x'},
465680a9cb8SJohn Marino {"max-count", required_argument, NULL, 'm'},
466680a9cb8SJohn Marino
467680a9cb8SJohn Marino {"no-filename", no_argument, NULL, 'h'},
468680a9cb8SJohn Marino {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
469680a9cb8SJohn Marino {"no-messages", no_argument, NULL, 's'},
470680a9cb8SJohn Marino {"null", no_argument, NULL, 'Z'},
471680a9cb8SJohn Marino {"null-data", no_argument, NULL, 'z'},
472680a9cb8SJohn Marino {"only-matching", no_argument, NULL, 'o'},
473680a9cb8SJohn Marino {"quiet", no_argument, NULL, 'q'},
474680a9cb8SJohn Marino {"recursive", no_argument, NULL, 'r'},
475680a9cb8SJohn Marino {"dereference-recursive", no_argument, NULL, 'R'},
476680a9cb8SJohn Marino {"regexp", required_argument, NULL, 'e'},
477680a9cb8SJohn Marino {"invert-match", no_argument, NULL, 'v'},
478680a9cb8SJohn Marino {"silent", no_argument, NULL, 'q'},
479680a9cb8SJohn Marino {"text", no_argument, NULL, 'a'},
480680a9cb8SJohn Marino {"binary", no_argument, NULL, 'U'},
481680a9cb8SJohn Marino {"unix-byte-offsets", no_argument, NULL, 'u'},
482680a9cb8SJohn Marino {"version", no_argument, NULL, 'V'},
483680a9cb8SJohn Marino {"with-filename", no_argument, NULL, 'H'},
484680a9cb8SJohn Marino {"word-regexp", no_argument, NULL, 'w'},
485680a9cb8SJohn Marino {0, 0, 0, 0}
486680a9cb8SJohn Marino };
487680a9cb8SJohn Marino
488680a9cb8SJohn Marino /* Define flags declared in grep.h. */
489dc7c36e4SJohn Marino bool match_icase;
490dc7c36e4SJohn Marino bool match_words;
491dc7c36e4SJohn Marino bool match_lines;
492dc7c36e4SJohn Marino char eolbyte;
493680a9cb8SJohn Marino
494680a9cb8SJohn Marino /* For error messages. */
495*09d4459fSDaniel Fojt /* The input file name, or (if standard input) null or a --label argument. */
496680a9cb8SJohn Marino static char const *filename;
497dc7c36e4SJohn Marino /* Omit leading "./" from file names in diagnostics. */
498dc7c36e4SJohn Marino static bool omit_dot_slash;
499dc7c36e4SJohn Marino static bool errseen;
500*09d4459fSDaniel Fojt
501*09d4459fSDaniel Fojt /* True if output from the current input file has been suppressed
502*09d4459fSDaniel Fojt because an output line had an encoding error. */
503*09d4459fSDaniel Fojt static bool encoding_error_output;
504680a9cb8SJohn Marino
505680a9cb8SJohn Marino enum directories_type
506680a9cb8SJohn Marino {
507680a9cb8SJohn Marino READ_DIRECTORIES = 2,
508680a9cb8SJohn Marino RECURSE_DIRECTORIES,
509680a9cb8SJohn Marino SKIP_DIRECTORIES
510680a9cb8SJohn Marino };
511680a9cb8SJohn Marino
512680a9cb8SJohn Marino /* How to handle directories. */
513680a9cb8SJohn Marino static char const *const directories_args[] =
514680a9cb8SJohn Marino {
515680a9cb8SJohn Marino "read", "recurse", "skip", NULL
516680a9cb8SJohn Marino };
517680a9cb8SJohn Marino static enum directories_type const directories_types[] =
518680a9cb8SJohn Marino {
519680a9cb8SJohn Marino READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
520680a9cb8SJohn Marino };
521680a9cb8SJohn Marino ARGMATCH_VERIFY (directories_args, directories_types);
522680a9cb8SJohn Marino
523680a9cb8SJohn Marino static enum directories_type directories = READ_DIRECTORIES;
524680a9cb8SJohn Marino
525680a9cb8SJohn Marino enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
526680a9cb8SJohn Marino static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
527680a9cb8SJohn Marino
528680a9cb8SJohn Marino /* How to handle devices. */
529680a9cb8SJohn Marino static enum
530680a9cb8SJohn Marino {
531680a9cb8SJohn Marino READ_COMMAND_LINE_DEVICES,
532680a9cb8SJohn Marino READ_DEVICES,
533680a9cb8SJohn Marino SKIP_DEVICES
534680a9cb8SJohn Marino } devices = READ_COMMAND_LINE_DEVICES;
535680a9cb8SJohn Marino
536dc7c36e4SJohn Marino static bool grepfile (int, char const *, bool, bool);
537dc7c36e4SJohn Marino static bool grepdesc (int, bool);
538680a9cb8SJohn Marino
539dc7c36e4SJohn Marino static bool
is_device_mode(mode_t m)540680a9cb8SJohn Marino is_device_mode (mode_t m)
541680a9cb8SJohn Marino {
542680a9cb8SJohn Marino return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
543680a9cb8SJohn Marino }
544680a9cb8SJohn Marino
545dc7c36e4SJohn Marino static bool
skip_devices(bool command_line)546dc7c36e4SJohn Marino skip_devices (bool command_line)
547dc7c36e4SJohn Marino {
548dc7c36e4SJohn Marino return (devices == SKIP_DEVICES
549*09d4459fSDaniel Fojt || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
550dc7c36e4SJohn Marino }
551dc7c36e4SJohn Marino
552dc7c36e4SJohn Marino /* Return if ST->st_size is defined. Assume the file is not a
553680a9cb8SJohn Marino symbolic link. */
554dc7c36e4SJohn Marino static bool
usable_st_size(struct stat const * st)555680a9cb8SJohn Marino usable_st_size (struct stat const *st)
556680a9cb8SJohn Marino {
557680a9cb8SJohn Marino return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
558680a9cb8SJohn Marino }
559680a9cb8SJohn Marino
560dc7c36e4SJohn Marino /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
561dc7c36e4SJohn Marino Do not rely on these finding data or holes if they equal SEEK_SET. */
562dc7c36e4SJohn Marino #ifndef SEEK_DATA
563dc7c36e4SJohn Marino enum { SEEK_DATA = SEEK_SET };
564dc7c36e4SJohn Marino #endif
565dc7c36e4SJohn Marino #ifndef SEEK_HOLE
566dc7c36e4SJohn Marino enum { SEEK_HOLE = SEEK_SET };
567dc7c36e4SJohn Marino #endif
568dc7c36e4SJohn Marino
569*09d4459fSDaniel Fojt /* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
570*09d4459fSDaniel Fojt static bool seek_failed;
571*09d4459fSDaniel Fojt static bool seek_data_failed;
572680a9cb8SJohn Marino
573*09d4459fSDaniel Fojt /* Functions we'll use to search. */
574*09d4459fSDaniel Fojt typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t);
575*09d4459fSDaniel Fojt typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
576*09d4459fSDaniel Fojt char const *);
577*09d4459fSDaniel Fojt static execute_fp_t execute;
578*09d4459fSDaniel Fojt static void *compiled_pattern;
579*09d4459fSDaniel Fojt
580*09d4459fSDaniel Fojt static char const *
input_filename(void)581*09d4459fSDaniel Fojt input_filename (void)
582*09d4459fSDaniel Fojt {
583*09d4459fSDaniel Fojt if (!filename)
584*09d4459fSDaniel Fojt filename = _("(standard input)");
585*09d4459fSDaniel Fojt return filename;
586*09d4459fSDaniel Fojt }
587*09d4459fSDaniel Fojt
588*09d4459fSDaniel Fojt /* Unless requested, diagnose an error about the input file. */
589680a9cb8SJohn Marino static void
suppressible_error(int errnum)590*09d4459fSDaniel Fojt suppressible_error (int errnum)
591680a9cb8SJohn Marino {
592680a9cb8SJohn Marino if (! suppress_errors)
593*09d4459fSDaniel Fojt error (0, errnum, "%s", input_filename ());
594dc7c36e4SJohn Marino errseen = true;
595680a9cb8SJohn Marino }
596680a9cb8SJohn Marino
597680a9cb8SJohn Marino /* If there has already been a write error, don't bother closing
598680a9cb8SJohn Marino standard output, as that might elicit a duplicate diagnostic. */
599680a9cb8SJohn Marino static void
clean_up_stdout(void)600680a9cb8SJohn Marino clean_up_stdout (void)
601680a9cb8SJohn Marino {
602*09d4459fSDaniel Fojt if (! stdout_errno)
603680a9cb8SJohn Marino close_stdout ();
604680a9cb8SJohn Marino }
605680a9cb8SJohn Marino
606dc7c36e4SJohn Marino /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL
607dc7c36e4SJohn Marino is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
608dc7c36e4SJohn Marino the alignment and would otherwise complain about the cast. */
609dc7c36e4SJohn Marino #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
610dc7c36e4SJohn Marino # define CAST_ALIGNED(type, val) \
611dc7c36e4SJohn Marino ({ __typeof__ (val) val_ = val; \
612dc7c36e4SJohn Marino _Pragma ("GCC diagnostic push") \
613dc7c36e4SJohn Marino _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
614dc7c36e4SJohn Marino (type) val_; \
615dc7c36e4SJohn Marino _Pragma ("GCC diagnostic pop") \
616dc7c36e4SJohn Marino })
617dc7c36e4SJohn Marino #else
618dc7c36e4SJohn Marino # define CAST_ALIGNED(type, val) ((type) (val))
619680a9cb8SJohn Marino #endif
620680a9cb8SJohn Marino
621dc7c36e4SJohn Marino /* An unsigned type suitable for fast matching. */
622dc7c36e4SJohn Marino typedef uintmax_t uword;
623680a9cb8SJohn Marino
624*09d4459fSDaniel Fojt struct localeinfo localeinfo;
625*09d4459fSDaniel Fojt
626*09d4459fSDaniel Fojt /* A mask to test for unibyte characters, with the pattern repeated to
627*09d4459fSDaniel Fojt fill a uword. For a multibyte character encoding where
628*09d4459fSDaniel Fojt all bytes are unibyte characters, this is 0. For UTF-8, this is
629*09d4459fSDaniel Fojt 0x808080.... For encodings where unibyte characters have no discerned
630*09d4459fSDaniel Fojt pattern, this is all 1s. The unsigned char C is a unibyte
631*09d4459fSDaniel Fojt character if C & UNIBYTE_MASK is zero. If the uword W is the
632*09d4459fSDaniel Fojt concatenation of bytes, the bytes are all unibyte characters
633*09d4459fSDaniel Fojt if W & UNIBYTE_MASK is zero. */
634*09d4459fSDaniel Fojt static uword unibyte_mask;
635*09d4459fSDaniel Fojt
636*09d4459fSDaniel Fojt static void
initialize_unibyte_mask(void)637*09d4459fSDaniel Fojt initialize_unibyte_mask (void)
638*09d4459fSDaniel Fojt {
639*09d4459fSDaniel Fojt /* For each encoding error I that MASK does not already match,
640*09d4459fSDaniel Fojt accumulate I's most significant 1 bit by ORing it into MASK.
641*09d4459fSDaniel Fojt Although any 1 bit of I could be used, in practice high-order
642*09d4459fSDaniel Fojt bits work better. */
643*09d4459fSDaniel Fojt unsigned char mask = 0;
644*09d4459fSDaniel Fojt int ms1b = 1;
645*09d4459fSDaniel Fojt for (int i = 1; i <= UCHAR_MAX; i++)
646*09d4459fSDaniel Fojt if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
647*09d4459fSDaniel Fojt {
648*09d4459fSDaniel Fojt while (ms1b * 2 <= i)
649*09d4459fSDaniel Fojt ms1b *= 2;
650*09d4459fSDaniel Fojt mask |= ms1b;
651*09d4459fSDaniel Fojt }
652*09d4459fSDaniel Fojt
653*09d4459fSDaniel Fojt /* Now MASK will detect any encoding-error byte, although it may
654*09d4459fSDaniel Fojt cry wolf and it may not be optimal. Build a uword-length mask by
655*09d4459fSDaniel Fojt repeating MASK. */
656*09d4459fSDaniel Fojt uword uword_max = -1;
657*09d4459fSDaniel Fojt unibyte_mask = uword_max / UCHAR_MAX * mask;
658*09d4459fSDaniel Fojt }
659*09d4459fSDaniel Fojt
660dc7c36e4SJohn Marino /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
661dc7c36e4SJohn Marino that is not easy, and return a pointer to the first non-easy byte.
662*09d4459fSDaniel Fojt The easy bytes all have UNIBYTE_MASK off. */
663dc7c36e4SJohn Marino static char const * _GL_ATTRIBUTE_PURE
skip_easy_bytes(char const * buf)664dc7c36e4SJohn Marino skip_easy_bytes (char const *buf)
665dc7c36e4SJohn Marino {
666dc7c36e4SJohn Marino /* Search a byte at a time until the pointer is aligned, then a
667dc7c36e4SJohn Marino uword at a time until a match is found, then a byte at a time to
668dc7c36e4SJohn Marino identify the exact byte. The uword search may go slightly past
669dc7c36e4SJohn Marino the buffer end, but that's benign. */
670dc7c36e4SJohn Marino char const *p;
671dc7c36e4SJohn Marino uword const *s;
672dc7c36e4SJohn Marino for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
673*09d4459fSDaniel Fojt if (to_uchar (*p) & unibyte_mask)
674dc7c36e4SJohn Marino return p;
675*09d4459fSDaniel Fojt for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
676dc7c36e4SJohn Marino continue;
677*09d4459fSDaniel Fojt for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
678dc7c36e4SJohn Marino continue;
679dc7c36e4SJohn Marino return p;
680dc7c36e4SJohn Marino }
681dc7c36e4SJohn Marino
682*09d4459fSDaniel Fojt /* Return true if BUF, of size SIZE, has an encoding error.
683dc7c36e4SJohn Marino BUF must be followed by at least sizeof (uword) bytes,
684*09d4459fSDaniel Fojt the first of which may be modified. */
685*09d4459fSDaniel Fojt static bool
buf_has_encoding_errors(char * buf,size_t size)686*09d4459fSDaniel Fojt buf_has_encoding_errors (char *buf, size_t size)
687dc7c36e4SJohn Marino {
688*09d4459fSDaniel Fojt if (! unibyte_mask)
689*09d4459fSDaniel Fojt return false;
690dc7c36e4SJohn Marino
691dc7c36e4SJohn Marino mbstate_t mbs = { 0 };
692dc7c36e4SJohn Marino size_t clen;
693dc7c36e4SJohn Marino
694dc7c36e4SJohn Marino buf[size] = -1;
695*09d4459fSDaniel Fojt for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
696dc7c36e4SJohn Marino {
697dc7c36e4SJohn Marino clen = mbrlen (p, buf + size - p, &mbs);
698dc7c36e4SJohn Marino if ((size_t) -2 <= clen)
699*09d4459fSDaniel Fojt return true;
700dc7c36e4SJohn Marino }
701dc7c36e4SJohn Marino
702*09d4459fSDaniel Fojt return false;
703dc7c36e4SJohn Marino }
704dc7c36e4SJohn Marino
705*09d4459fSDaniel Fojt
706*09d4459fSDaniel Fojt /* Return true if BUF, of size SIZE, has a null byte.
707*09d4459fSDaniel Fojt BUF must be followed by at least one byte,
708dc7c36e4SJohn Marino which may be arbitrarily written to or read from. */
709*09d4459fSDaniel Fojt static bool
buf_has_nulls(char * buf,size_t size)710*09d4459fSDaniel Fojt buf_has_nulls (char *buf, size_t size)
711dc7c36e4SJohn Marino {
712*09d4459fSDaniel Fojt buf[size] = 0;
713*09d4459fSDaniel Fojt return strlen (buf) != size;
714*09d4459fSDaniel Fojt }
715dc7c36e4SJohn Marino
716*09d4459fSDaniel Fojt /* Return true if a file is known to contain null bytes.
717*09d4459fSDaniel Fojt SIZE bytes have already been read from the file
718*09d4459fSDaniel Fojt with descriptor FD and status ST. */
719*09d4459fSDaniel Fojt static bool
file_must_have_nulls(size_t size,int fd,struct stat const * st)720*09d4459fSDaniel Fojt file_must_have_nulls (size_t size, int fd, struct stat const *st)
721dc7c36e4SJohn Marino {
722680a9cb8SJohn Marino /* If the file has holes, it must contain a null byte somewhere. */
723*09d4459fSDaniel Fojt if (SEEK_HOLE != SEEK_SET && !seek_failed
724*09d4459fSDaniel Fojt && usable_st_size (st) && size < st->st_size)
725680a9cb8SJohn Marino {
726dc7c36e4SJohn Marino off_t cur = size;
727680a9cb8SJohn Marino if (O_BINARY || fd == STDIN_FILENO)
728680a9cb8SJohn Marino {
729680a9cb8SJohn Marino cur = lseek (fd, 0, SEEK_CUR);
730680a9cb8SJohn Marino if (cur < 0)
731*09d4459fSDaniel Fojt return false;
732680a9cb8SJohn Marino }
733680a9cb8SJohn Marino
734680a9cb8SJohn Marino /* Look for a hole after the current location. */
735680a9cb8SJohn Marino off_t hole_start = lseek (fd, cur, SEEK_HOLE);
736680a9cb8SJohn Marino if (0 <= hole_start)
737680a9cb8SJohn Marino {
738680a9cb8SJohn Marino if (lseek (fd, cur, SEEK_SET) < 0)
739*09d4459fSDaniel Fojt suppressible_error (errno);
740680a9cb8SJohn Marino if (hole_start < st->st_size)
741*09d4459fSDaniel Fojt return true;
742680a9cb8SJohn Marino }
743680a9cb8SJohn Marino }
744680a9cb8SJohn Marino
745*09d4459fSDaniel Fojt return false;
746680a9cb8SJohn Marino }
747680a9cb8SJohn Marino
748680a9cb8SJohn Marino /* Convert STR to a nonnegative integer, storing the result in *OUT.
749680a9cb8SJohn Marino STR must be a valid context length argument; report an error if it
750680a9cb8SJohn Marino isn't. Silently ceiling *OUT at the maximum value, as that is
751680a9cb8SJohn Marino practically equivalent to infinity for grep's purposes. */
752680a9cb8SJohn Marino static void
context_length_arg(char const * str,intmax_t * out)753680a9cb8SJohn Marino context_length_arg (char const *str, intmax_t *out)
754680a9cb8SJohn Marino {
755680a9cb8SJohn Marino switch (xstrtoimax (str, 0, 10, out, ""))
756680a9cb8SJohn Marino {
757680a9cb8SJohn Marino case LONGINT_OK:
758680a9cb8SJohn Marino case LONGINT_OVERFLOW:
759680a9cb8SJohn Marino if (0 <= *out)
760680a9cb8SJohn Marino break;
761*09d4459fSDaniel Fojt FALLTHROUGH;
762680a9cb8SJohn Marino default:
763*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, "%s: %s", str,
764680a9cb8SJohn Marino _("invalid context length argument"));
765680a9cb8SJohn Marino }
766680a9cb8SJohn Marino }
767680a9cb8SJohn Marino
768*09d4459fSDaniel Fojt /* Return the add_exclude options suitable for excluding a file name.
769*09d4459fSDaniel Fojt If COMMAND_LINE, it is a command-line file name. */
770*09d4459fSDaniel Fojt static int
exclude_options(bool command_line)771*09d4459fSDaniel Fojt exclude_options (bool command_line)
772*09d4459fSDaniel Fojt {
773*09d4459fSDaniel Fojt return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
774*09d4459fSDaniel Fojt }
775*09d4459fSDaniel Fojt
776dc7c36e4SJohn Marino /* Return true if the file with NAME should be skipped.
777dc7c36e4SJohn Marino If COMMAND_LINE, it is a command-line argument.
778dc7c36e4SJohn Marino If IS_DIR, it is a directory. */
779dc7c36e4SJohn Marino static bool
skipped_file(char const * name,bool command_line,bool is_dir)780dc7c36e4SJohn Marino skipped_file (char const *name, bool command_line, bool is_dir)
781680a9cb8SJohn Marino {
782*09d4459fSDaniel Fojt struct exclude **pats;
783*09d4459fSDaniel Fojt if (! is_dir)
784*09d4459fSDaniel Fojt pats = excluded_patterns;
785*09d4459fSDaniel Fojt else if (directories == SKIP_DIRECTORIES)
786*09d4459fSDaniel Fojt return true;
787*09d4459fSDaniel Fojt else if (command_line && omit_dot_slash)
788*09d4459fSDaniel Fojt return false;
789*09d4459fSDaniel Fojt else
790*09d4459fSDaniel Fojt pats = excluded_directory_patterns;
791*09d4459fSDaniel Fojt return pats[command_line] && excluded_file_name (pats[command_line], name);
792680a9cb8SJohn Marino }
793680a9cb8SJohn Marino
794680a9cb8SJohn Marino /* Hairy buffering mechanism for grep. The intent is to keep
795680a9cb8SJohn Marino all reads aligned on a page boundary and multiples of the
796680a9cb8SJohn Marino page size, unless a read yields a partial page. */
797680a9cb8SJohn Marino
798680a9cb8SJohn Marino static char *buffer; /* Base of buffer. */
799680a9cb8SJohn Marino static size_t bufalloc; /* Allocated buffer size, counting slop. */
800680a9cb8SJohn Marino static int bufdesc; /* File descriptor. */
801680a9cb8SJohn Marino static char *bufbeg; /* Beginning of user-visible stuff. */
802680a9cb8SJohn Marino static char *buflim; /* Limit of user-visible stuff. */
803680a9cb8SJohn Marino static size_t pagesize; /* alignment of memory pages */
804*09d4459fSDaniel Fojt static off_t bufoffset; /* Read offset. */
805680a9cb8SJohn Marino static off_t after_last_match; /* Pointer after last matching line that
806680a9cb8SJohn Marino would have been output if we were
807680a9cb8SJohn Marino outputting characters. */
808dc7c36e4SJohn Marino static bool skip_nuls; /* Skip '\0' in data. */
809dc7c36e4SJohn Marino static bool skip_empty_lines; /* Skip empty lines in data. */
810dc7c36e4SJohn Marino static uintmax_t totalnl; /* Total newline count before lastnl. */
811680a9cb8SJohn Marino
812*09d4459fSDaniel Fojt /* Initial buffer size, not counting slop. */
813*09d4459fSDaniel Fojt enum { INITIAL_BUFSIZE = 96 * 1024 };
814*09d4459fSDaniel Fojt
815680a9cb8SJohn Marino /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
816680a9cb8SJohn Marino an integer or a pointer. Both args must be free of side effects. */
817680a9cb8SJohn Marino #define ALIGN_TO(val, alignment) \
818680a9cb8SJohn Marino ((size_t) (val) % (alignment) == 0 \
819680a9cb8SJohn Marino ? (val) \
820680a9cb8SJohn Marino : (val) + ((alignment) - (size_t) (val) % (alignment)))
821680a9cb8SJohn Marino
822dc7c36e4SJohn Marino /* Add two numbers that count input bytes or lines, and report an
823dc7c36e4SJohn Marino error if the addition overflows. */
824dc7c36e4SJohn Marino static uintmax_t
add_count(uintmax_t a,uintmax_t b)825dc7c36e4SJohn Marino add_count (uintmax_t a, uintmax_t b)
826dc7c36e4SJohn Marino {
827dc7c36e4SJohn Marino uintmax_t sum = a + b;
828dc7c36e4SJohn Marino if (sum < a)
829*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, _("input is too large to count"));
830dc7c36e4SJohn Marino return sum;
831dc7c36e4SJohn Marino }
832dc7c36e4SJohn Marino
833dc7c36e4SJohn Marino /* Return true if BUF (of size SIZE) is all zeros. */
834dc7c36e4SJohn Marino static bool
all_zeros(char const * buf,size_t size)835dc7c36e4SJohn Marino all_zeros (char const *buf, size_t size)
836dc7c36e4SJohn Marino {
837dc7c36e4SJohn Marino for (char const *p = buf; p < buf + size; p++)
838dc7c36e4SJohn Marino if (*p)
839dc7c36e4SJohn Marino return false;
840dc7c36e4SJohn Marino return true;
841dc7c36e4SJohn Marino }
842dc7c36e4SJohn Marino
843dc7c36e4SJohn Marino /* Reset the buffer for a new file, returning false if we should skip it.
844680a9cb8SJohn Marino Initialize on the first time through. */
845dc7c36e4SJohn Marino static bool
reset(int fd,struct stat const * st)846680a9cb8SJohn Marino reset (int fd, struct stat const *st)
847680a9cb8SJohn Marino {
848680a9cb8SJohn Marino bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
849680a9cb8SJohn Marino bufbeg[-1] = eolbyte;
850680a9cb8SJohn Marino bufdesc = fd;
851*09d4459fSDaniel Fojt bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
852*09d4459fSDaniel Fojt seek_failed = bufoffset < 0;
853680a9cb8SJohn Marino
854*09d4459fSDaniel Fojt /* Assume SEEK_DATA fails if SEEK_CUR does. */
855*09d4459fSDaniel Fojt seek_data_failed = seek_failed;
856*09d4459fSDaniel Fojt
857*09d4459fSDaniel Fojt if (seek_failed)
858680a9cb8SJohn Marino {
859*09d4459fSDaniel Fojt if (errno != ESPIPE)
860680a9cb8SJohn Marino {
861*09d4459fSDaniel Fojt suppressible_error (errno);
862dc7c36e4SJohn Marino return false;
863680a9cb8SJohn Marino }
864*09d4459fSDaniel Fojt bufoffset = 0;
865680a9cb8SJohn Marino }
866dc7c36e4SJohn Marino return true;
867680a9cb8SJohn Marino }
868680a9cb8SJohn Marino
869680a9cb8SJohn Marino /* Read new stuff into the buffer, saving the specified
870680a9cb8SJohn Marino amount of old stuff. When we're done, 'bufbeg' points
871680a9cb8SJohn Marino to the beginning of the buffer contents, and 'buflim'
872dc7c36e4SJohn Marino points just after the end. Return false if there's an error. */
873dc7c36e4SJohn Marino static bool
fillbuf(size_t save,struct stat const * st)874680a9cb8SJohn Marino fillbuf (size_t save, struct stat const *st)
875680a9cb8SJohn Marino {
876dc7c36e4SJohn Marino size_t fillsize;
877dc7c36e4SJohn Marino bool cc = true;
878680a9cb8SJohn Marino char *readbuf;
879680a9cb8SJohn Marino size_t readsize;
880680a9cb8SJohn Marino
881680a9cb8SJohn Marino /* Offset from start of buffer to start of old stuff
882680a9cb8SJohn Marino that we want to save. */
883680a9cb8SJohn Marino size_t saved_offset = buflim - save - buffer;
884680a9cb8SJohn Marino
885dc7c36e4SJohn Marino if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
886680a9cb8SJohn Marino {
887680a9cb8SJohn Marino readbuf = buflim;
888680a9cb8SJohn Marino bufbeg = buflim - save;
889680a9cb8SJohn Marino }
890680a9cb8SJohn Marino else
891680a9cb8SJohn Marino {
892680a9cb8SJohn Marino size_t minsize = save + pagesize;
893680a9cb8SJohn Marino size_t newsize;
894680a9cb8SJohn Marino size_t newalloc;
895680a9cb8SJohn Marino char *newbuf;
896680a9cb8SJohn Marino
897680a9cb8SJohn Marino /* Grow newsize until it is at least as great as minsize. */
898dc7c36e4SJohn Marino for (newsize = bufalloc - pagesize - sizeof (uword);
899dc7c36e4SJohn Marino newsize < minsize;
900dc7c36e4SJohn Marino newsize *= 2)
901dc7c36e4SJohn Marino if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
902680a9cb8SJohn Marino xalloc_die ();
903680a9cb8SJohn Marino
904680a9cb8SJohn Marino /* Try not to allocate more memory than the file size indicates,
905680a9cb8SJohn Marino as that might cause unnecessary memory exhaustion if the file
906680a9cb8SJohn Marino is large. However, do not use the original file size as a
907680a9cb8SJohn Marino heuristic if we've already read past the file end, as most
908680a9cb8SJohn Marino likely the file is growing. */
909680a9cb8SJohn Marino if (usable_st_size (st))
910680a9cb8SJohn Marino {
911680a9cb8SJohn Marino off_t to_be_read = st->st_size - bufoffset;
912680a9cb8SJohn Marino off_t maxsize_off = save + to_be_read;
913680a9cb8SJohn Marino if (0 <= to_be_read && to_be_read <= maxsize_off
914680a9cb8SJohn Marino && maxsize_off == (size_t) maxsize_off
915680a9cb8SJohn Marino && minsize <= (size_t) maxsize_off
916680a9cb8SJohn Marino && (size_t) maxsize_off < newsize)
917680a9cb8SJohn Marino newsize = maxsize_off;
918680a9cb8SJohn Marino }
919680a9cb8SJohn Marino
920680a9cb8SJohn Marino /* Add enough room so that the buffer is aligned and has room
921dc7c36e4SJohn Marino for byte sentinels fore and aft, and so that a uword can
922dc7c36e4SJohn Marino be read aft. */
923dc7c36e4SJohn Marino newalloc = newsize + pagesize + sizeof (uword);
924680a9cb8SJohn Marino
925680a9cb8SJohn Marino newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
926680a9cb8SJohn Marino readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
927680a9cb8SJohn Marino bufbeg = readbuf - save;
928680a9cb8SJohn Marino memmove (bufbeg, buffer + saved_offset, save);
929680a9cb8SJohn Marino bufbeg[-1] = eolbyte;
930680a9cb8SJohn Marino if (newbuf != buffer)
931680a9cb8SJohn Marino {
932680a9cb8SJohn Marino free (buffer);
933680a9cb8SJohn Marino buffer = newbuf;
934680a9cb8SJohn Marino }
935680a9cb8SJohn Marino }
936680a9cb8SJohn Marino
937dc7c36e4SJohn Marino clear_asan_poison ();
938dc7c36e4SJohn Marino
939dc7c36e4SJohn Marino readsize = buffer + bufalloc - sizeof (uword) - readbuf;
940680a9cb8SJohn Marino readsize -= readsize % pagesize;
941680a9cb8SJohn Marino
942dc7c36e4SJohn Marino while (true)
943dc7c36e4SJohn Marino {
944680a9cb8SJohn Marino fillsize = safe_read (bufdesc, readbuf, readsize);
945dc7c36e4SJohn Marino if (fillsize == SAFE_READ_ERROR)
946dc7c36e4SJohn Marino {
947dc7c36e4SJohn Marino fillsize = 0;
948dc7c36e4SJohn Marino cc = false;
949dc7c36e4SJohn Marino }
950680a9cb8SJohn Marino bufoffset += fillsize;
951dc7c36e4SJohn Marino
952*09d4459fSDaniel Fojt if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
953dc7c36e4SJohn Marino break;
954dc7c36e4SJohn Marino totalnl = add_count (totalnl, fillsize);
955dc7c36e4SJohn Marino
956dc7c36e4SJohn Marino if (SEEK_DATA != SEEK_SET && !seek_data_failed)
957dc7c36e4SJohn Marino {
958dc7c36e4SJohn Marino /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */
959dc7c36e4SJohn Marino off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
960dc7c36e4SJohn Marino if (data_start < 0 && errno == ENXIO
961dc7c36e4SJohn Marino && usable_st_size (st) && bufoffset < st->st_size)
962dc7c36e4SJohn Marino data_start = lseek (bufdesc, 0, SEEK_END);
963dc7c36e4SJohn Marino
964dc7c36e4SJohn Marino if (data_start < 0)
965dc7c36e4SJohn Marino seek_data_failed = true;
966dc7c36e4SJohn Marino else
967dc7c36e4SJohn Marino {
968dc7c36e4SJohn Marino totalnl = add_count (totalnl, data_start - bufoffset);
969dc7c36e4SJohn Marino bufoffset = data_start;
970dc7c36e4SJohn Marino }
971dc7c36e4SJohn Marino }
972dc7c36e4SJohn Marino }
973dc7c36e4SJohn Marino
974680a9cb8SJohn Marino buflim = readbuf + fillsize;
975dc7c36e4SJohn Marino
976dc7c36e4SJohn Marino /* Initialize the following word, because skip_easy_bytes and some
977dc7c36e4SJohn Marino matchers read (but do not use) those bytes. This avoids false
978dc7c36e4SJohn Marino positive reports of these bytes being used uninitialized. */
979dc7c36e4SJohn Marino memset (buflim, 0, sizeof (uword));
980dc7c36e4SJohn Marino
981dc7c36e4SJohn Marino /* Mark the part of the buffer not filled by the read or set by
982dc7c36e4SJohn Marino the above memset call as ASAN-poisoned. */
983dc7c36e4SJohn Marino asan_poison (buflim + sizeof (uword),
984dc7c36e4SJohn Marino bufalloc - (buflim - buffer) - sizeof (uword));
985dc7c36e4SJohn Marino
986680a9cb8SJohn Marino return cc;
987680a9cb8SJohn Marino }
988680a9cb8SJohn Marino
989680a9cb8SJohn Marino /* Flags controlling the style of output. */
990680a9cb8SJohn Marino static enum
991680a9cb8SJohn Marino {
992680a9cb8SJohn Marino BINARY_BINARY_FILES,
993680a9cb8SJohn Marino TEXT_BINARY_FILES,
994680a9cb8SJohn Marino WITHOUT_MATCH_BINARY_FILES
995680a9cb8SJohn Marino } binary_files; /* How to handle binary files. */
996680a9cb8SJohn Marino
997*09d4459fSDaniel Fojt /* Options for output as a list of matching/non-matching files */
998*09d4459fSDaniel Fojt static enum
999*09d4459fSDaniel Fojt {
1000*09d4459fSDaniel Fojt LISTFILES_NONE,
1001*09d4459fSDaniel Fojt LISTFILES_MATCHING,
1002*09d4459fSDaniel Fojt LISTFILES_NONMATCHING,
1003*09d4459fSDaniel Fojt } list_files;
1004*09d4459fSDaniel Fojt
1005*09d4459fSDaniel Fojt /* Whether to output filenames. 1 means yes, 0 means no, and -1 means
1006*09d4459fSDaniel Fojt 'grep -r PATTERN FILE' was used and it is not known yet whether
1007*09d4459fSDaniel Fojt FILE is a directory (which means yes) or not (which means no). */
1008*09d4459fSDaniel Fojt static int out_file;
1009*09d4459fSDaniel Fojt
1010680a9cb8SJohn Marino static int filename_mask; /* If zero, output nulls after filenames. */
1011dc7c36e4SJohn Marino static bool out_quiet; /* Suppress all normal output. */
1012680a9cb8SJohn Marino static bool out_invert; /* Print nonmatching stuff. */
1013dc7c36e4SJohn Marino static bool out_line; /* Print line numbers. */
1014dc7c36e4SJohn Marino static bool out_byte; /* Print byte offsets. */
1015680a9cb8SJohn Marino static intmax_t out_before; /* Lines of leading context. */
1016680a9cb8SJohn Marino static intmax_t out_after; /* Lines of trailing context. */
1017dc7c36e4SJohn Marino static bool count_matches; /* Count matching lines. */
1018*09d4459fSDaniel Fojt static intmax_t max_count; /* Max number of selected
1019680a9cb8SJohn Marino lines from an input file. */
1020dc7c36e4SJohn Marino static bool line_buffered; /* Use line buffering. */
1021680a9cb8SJohn Marino static char *label = NULL; /* Fake filename for stdin */
1022680a9cb8SJohn Marino
1023680a9cb8SJohn Marino
1024680a9cb8SJohn Marino /* Internal variables to keep track of byte count, context, etc. */
1025680a9cb8SJohn Marino static uintmax_t totalcc; /* Total character count before bufbeg. */
1026680a9cb8SJohn Marino static char const *lastnl; /* Pointer after last newline counted. */
1027*09d4459fSDaniel Fojt static char *lastout; /* Pointer after last character output;
1028680a9cb8SJohn Marino NULL if no character has been output
1029680a9cb8SJohn Marino or if it's conceptually before bufbeg. */
1030*09d4459fSDaniel Fojt static intmax_t outleft; /* Maximum number of selected lines. */
1031680a9cb8SJohn Marino static intmax_t pending; /* Pending lines of output.
1032680a9cb8SJohn Marino Always kept 0 if out_quiet is true. */
1033dc7c36e4SJohn Marino static bool done_on_match; /* Stop scanning file on first match. */
1034dc7c36e4SJohn Marino static bool exit_on_match; /* Exit on first match. */
1035*09d4459fSDaniel Fojt static bool dev_null_output; /* Stdout is known to be /dev/null. */
1036*09d4459fSDaniel Fojt static bool binary; /* Use binary rather than text I/O. */
1037680a9cb8SJohn Marino
1038680a9cb8SJohn Marino static void
nlscan(char const * lim)1039680a9cb8SJohn Marino nlscan (char const *lim)
1040680a9cb8SJohn Marino {
1041680a9cb8SJohn Marino size_t newlines = 0;
1042680a9cb8SJohn Marino char const *beg;
1043680a9cb8SJohn Marino for (beg = lastnl; beg < lim; beg++)
1044680a9cb8SJohn Marino {
1045680a9cb8SJohn Marino beg = memchr (beg, eolbyte, lim - beg);
1046680a9cb8SJohn Marino if (!beg)
1047680a9cb8SJohn Marino break;
1048680a9cb8SJohn Marino newlines++;
1049680a9cb8SJohn Marino }
1050680a9cb8SJohn Marino totalnl = add_count (totalnl, newlines);
1051680a9cb8SJohn Marino lastnl = lim;
1052680a9cb8SJohn Marino }
1053680a9cb8SJohn Marino
1054680a9cb8SJohn Marino /* Print the current filename. */
1055680a9cb8SJohn Marino static void
print_filename(void)1056680a9cb8SJohn Marino print_filename (void)
1057680a9cb8SJohn Marino {
1058680a9cb8SJohn Marino pr_sgr_start_if (filename_color);
1059*09d4459fSDaniel Fojt fputs_errno (input_filename ());
1060680a9cb8SJohn Marino pr_sgr_end_if (filename_color);
1061680a9cb8SJohn Marino }
1062680a9cb8SJohn Marino
1063680a9cb8SJohn Marino /* Print a character separator. */
1064680a9cb8SJohn Marino static void
print_sep(char sep)1065680a9cb8SJohn Marino print_sep (char sep)
1066680a9cb8SJohn Marino {
1067680a9cb8SJohn Marino pr_sgr_start_if (sep_color);
1068*09d4459fSDaniel Fojt putchar_errno (sep);
1069680a9cb8SJohn Marino pr_sgr_end_if (sep_color);
1070680a9cb8SJohn Marino }
1071680a9cb8SJohn Marino
1072680a9cb8SJohn Marino /* Print a line number or a byte offset. */
1073680a9cb8SJohn Marino static void
print_offset(uintmax_t pos,const char * color)1074*09d4459fSDaniel Fojt print_offset (uintmax_t pos, const char *color)
1075680a9cb8SJohn Marino {
1076680a9cb8SJohn Marino pr_sgr_start_if (color);
1077*09d4459fSDaniel Fojt printf_errno ("%*"PRIuMAX, offset_width, pos);
1078680a9cb8SJohn Marino pr_sgr_end_if (color);
1079680a9cb8SJohn Marino }
1080680a9cb8SJohn Marino
1081*09d4459fSDaniel Fojt /* Print a whole line head (filename, line, byte). The output data
1082*09d4459fSDaniel Fojt starts at BEG and contains LEN bytes; it is followed by at least
1083*09d4459fSDaniel Fojt sizeof (uword) bytes, the first of which may be temporarily modified.
1084*09d4459fSDaniel Fojt The output data comes from what is perhaps a larger input line that
1085*09d4459fSDaniel Fojt goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
1086*09d4459fSDaniel Fojt the separator on output.
1087*09d4459fSDaniel Fojt
1088*09d4459fSDaniel Fojt Return true unless the line was suppressed due to an encoding error. */
1089*09d4459fSDaniel Fojt
1090*09d4459fSDaniel Fojt static bool
print_line_head(char * beg,size_t len,char const * lim,char sep)1091*09d4459fSDaniel Fojt print_line_head (char *beg, size_t len, char const *lim, char sep)
1092680a9cb8SJohn Marino {
1093*09d4459fSDaniel Fojt if (binary_files != TEXT_BINARY_FILES)
1094*09d4459fSDaniel Fojt {
1095*09d4459fSDaniel Fojt char ch = beg[len];
1096*09d4459fSDaniel Fojt bool encoding_errors = buf_has_encoding_errors (beg, len);
1097*09d4459fSDaniel Fojt beg[len] = ch;
1098*09d4459fSDaniel Fojt if (encoding_errors)
1099*09d4459fSDaniel Fojt {
1100*09d4459fSDaniel Fojt encoding_error_output = true;
1101*09d4459fSDaniel Fojt return false;
1102*09d4459fSDaniel Fojt }
1103*09d4459fSDaniel Fojt }
1104680a9cb8SJohn Marino
1105680a9cb8SJohn Marino if (out_file)
1106680a9cb8SJohn Marino {
1107680a9cb8SJohn Marino print_filename ();
1108680a9cb8SJohn Marino if (filename_mask)
1109*09d4459fSDaniel Fojt print_sep (sep);
1110680a9cb8SJohn Marino else
1111*09d4459fSDaniel Fojt putchar_errno (0);
1112680a9cb8SJohn Marino }
1113680a9cb8SJohn Marino
1114680a9cb8SJohn Marino if (out_line)
1115680a9cb8SJohn Marino {
1116680a9cb8SJohn Marino if (lastnl < lim)
1117680a9cb8SJohn Marino {
1118680a9cb8SJohn Marino nlscan (beg);
1119680a9cb8SJohn Marino totalnl = add_count (totalnl, 1);
1120680a9cb8SJohn Marino lastnl = lim;
1121680a9cb8SJohn Marino }
1122*09d4459fSDaniel Fojt print_offset (totalnl, line_num_color);
1123680a9cb8SJohn Marino print_sep (sep);
1124680a9cb8SJohn Marino }
1125680a9cb8SJohn Marino
1126680a9cb8SJohn Marino if (out_byte)
1127680a9cb8SJohn Marino {
1128680a9cb8SJohn Marino uintmax_t pos = add_count (totalcc, beg - bufbeg);
1129*09d4459fSDaniel Fojt print_offset (pos, byte_num_color);
1130680a9cb8SJohn Marino print_sep (sep);
1131680a9cb8SJohn Marino }
1132*09d4459fSDaniel Fojt
1133*09d4459fSDaniel Fojt if (align_tabs && (out_file | out_line | out_byte) && len != 0)
1134*09d4459fSDaniel Fojt putchar_errno ('\t');
1135*09d4459fSDaniel Fojt
1136*09d4459fSDaniel Fojt return true;
1137680a9cb8SJohn Marino }
1138680a9cb8SJohn Marino
1139*09d4459fSDaniel Fojt static char *
print_line_middle(char * beg,char * lim,const char * line_color,const char * match_color)1140*09d4459fSDaniel Fojt print_line_middle (char *beg, char *lim,
1141680a9cb8SJohn Marino const char *line_color, const char *match_color)
1142680a9cb8SJohn Marino {
1143680a9cb8SJohn Marino size_t match_size;
1144680a9cb8SJohn Marino size_t match_offset;
1145*09d4459fSDaniel Fojt char *cur;
1146*09d4459fSDaniel Fojt char *mid = NULL;
1147*09d4459fSDaniel Fojt char *b;
1148680a9cb8SJohn Marino
1149*09d4459fSDaniel Fojt for (cur = beg;
1150*09d4459fSDaniel Fojt (cur < lim
1151*09d4459fSDaniel Fojt && ((match_offset = execute (compiled_pattern, beg, lim - beg,
1152*09d4459fSDaniel Fojt &match_size, cur)) != (size_t) -1));
1153*09d4459fSDaniel Fojt cur = b + match_size)
1154680a9cb8SJohn Marino {
1155*09d4459fSDaniel Fojt b = beg + match_offset;
1156680a9cb8SJohn Marino
1157680a9cb8SJohn Marino /* Avoid matching the empty line at the end of the buffer. */
1158680a9cb8SJohn Marino if (b == lim)
1159680a9cb8SJohn Marino break;
1160680a9cb8SJohn Marino
1161680a9cb8SJohn Marino /* Avoid hanging on grep --color "" foo */
1162680a9cb8SJohn Marino if (match_size == 0)
1163680a9cb8SJohn Marino {
1164680a9cb8SJohn Marino /* Make minimal progress; there may be further non-empty matches. */
1165680a9cb8SJohn Marino /* XXX - Could really advance by one whole multi-octet character. */
1166680a9cb8SJohn Marino match_size = 1;
1167680a9cb8SJohn Marino if (!mid)
1168680a9cb8SJohn Marino mid = cur;
1169680a9cb8SJohn Marino }
1170680a9cb8SJohn Marino else
1171680a9cb8SJohn Marino {
1172680a9cb8SJohn Marino /* This function is called on a matching line only,
1173680a9cb8SJohn Marino but is it selected or rejected/context? */
1174680a9cb8SJohn Marino if (only_matching)
1175*09d4459fSDaniel Fojt {
1176*09d4459fSDaniel Fojt char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
1177*09d4459fSDaniel Fojt if (! print_line_head (b, match_size, lim, sep))
1178*09d4459fSDaniel Fojt return NULL;
1179*09d4459fSDaniel Fojt }
1180680a9cb8SJohn Marino else
1181680a9cb8SJohn Marino {
1182680a9cb8SJohn Marino pr_sgr_start (line_color);
1183680a9cb8SJohn Marino if (mid)
1184680a9cb8SJohn Marino {
1185680a9cb8SJohn Marino cur = mid;
1186680a9cb8SJohn Marino mid = NULL;
1187680a9cb8SJohn Marino }
1188*09d4459fSDaniel Fojt fwrite_errno (cur, 1, b - cur);
1189680a9cb8SJohn Marino }
1190680a9cb8SJohn Marino
1191680a9cb8SJohn Marino pr_sgr_start_if (match_color);
1192*09d4459fSDaniel Fojt fwrite_errno (b, 1, match_size);
1193680a9cb8SJohn Marino pr_sgr_end_if (match_color);
1194680a9cb8SJohn Marino if (only_matching)
1195*09d4459fSDaniel Fojt putchar_errno (eolbyte);
1196680a9cb8SJohn Marino }
1197680a9cb8SJohn Marino }
1198680a9cb8SJohn Marino
1199680a9cb8SJohn Marino if (only_matching)
1200680a9cb8SJohn Marino cur = lim;
1201680a9cb8SJohn Marino else if (mid)
1202680a9cb8SJohn Marino cur = mid;
1203680a9cb8SJohn Marino
1204680a9cb8SJohn Marino return cur;
1205680a9cb8SJohn Marino }
1206680a9cb8SJohn Marino
1207*09d4459fSDaniel Fojt static char *
print_line_tail(char * beg,const char * lim,const char * line_color)1208*09d4459fSDaniel Fojt print_line_tail (char *beg, const char *lim, const char *line_color)
1209680a9cb8SJohn Marino {
1210680a9cb8SJohn Marino size_t eol_size;
1211680a9cb8SJohn Marino size_t tail_size;
1212680a9cb8SJohn Marino
1213680a9cb8SJohn Marino eol_size = (lim > beg && lim[-1] == eolbyte);
1214680a9cb8SJohn Marino eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1215680a9cb8SJohn Marino tail_size = lim - eol_size - beg;
1216680a9cb8SJohn Marino
1217680a9cb8SJohn Marino if (tail_size > 0)
1218680a9cb8SJohn Marino {
1219680a9cb8SJohn Marino pr_sgr_start (line_color);
1220*09d4459fSDaniel Fojt fwrite_errno (beg, 1, tail_size);
1221680a9cb8SJohn Marino beg += tail_size;
1222680a9cb8SJohn Marino pr_sgr_end (line_color);
1223680a9cb8SJohn Marino }
1224680a9cb8SJohn Marino
1225680a9cb8SJohn Marino return beg;
1226680a9cb8SJohn Marino }
1227680a9cb8SJohn Marino
1228680a9cb8SJohn Marino static void
prline(char * beg,char * lim,char sep)1229*09d4459fSDaniel Fojt prline (char *beg, char *lim, char sep)
1230680a9cb8SJohn Marino {
1231dc7c36e4SJohn Marino bool matching;
1232680a9cb8SJohn Marino const char *line_color;
1233680a9cb8SJohn Marino const char *match_color;
1234680a9cb8SJohn Marino
1235680a9cb8SJohn Marino if (!only_matching)
1236*09d4459fSDaniel Fojt if (! print_line_head (beg, lim - beg - 1, lim, sep))
1237*09d4459fSDaniel Fojt return;
1238680a9cb8SJohn Marino
1239680a9cb8SJohn Marino matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1240680a9cb8SJohn Marino
1241680a9cb8SJohn Marino if (color_option)
1242680a9cb8SJohn Marino {
1243680a9cb8SJohn Marino line_color = (((sep == SEP_CHAR_SELECTED)
1244680a9cb8SJohn Marino ^ (out_invert && (color_option < 0)))
1245680a9cb8SJohn Marino ? selected_line_color : context_line_color);
1246680a9cb8SJohn Marino match_color = (sep == SEP_CHAR_SELECTED
1247680a9cb8SJohn Marino ? selected_match_color : context_match_color);
1248680a9cb8SJohn Marino }
1249680a9cb8SJohn Marino else
1250680a9cb8SJohn Marino line_color = match_color = NULL; /* Shouldn't be used. */
1251680a9cb8SJohn Marino
1252680a9cb8SJohn Marino if ((only_matching && matching)
1253680a9cb8SJohn Marino || (color_option && (*line_color || *match_color)))
1254680a9cb8SJohn Marino {
1255680a9cb8SJohn Marino /* We already know that non-matching lines have no match (to colorize). */
1256680a9cb8SJohn Marino if (matching && (only_matching || *match_color))
1257*09d4459fSDaniel Fojt {
1258680a9cb8SJohn Marino beg = print_line_middle (beg, lim, line_color, match_color);
1259*09d4459fSDaniel Fojt if (! beg)
1260*09d4459fSDaniel Fojt return;
1261*09d4459fSDaniel Fojt }
1262680a9cb8SJohn Marino
1263680a9cb8SJohn Marino if (!only_matching && *line_color)
1264680a9cb8SJohn Marino {
1265680a9cb8SJohn Marino /* This code is exercised at least when grep is invoked like this:
1266680a9cb8SJohn Marino echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
1267680a9cb8SJohn Marino beg = print_line_tail (beg, lim, line_color);
1268680a9cb8SJohn Marino }
1269680a9cb8SJohn Marino }
1270680a9cb8SJohn Marino
1271680a9cb8SJohn Marino if (!only_matching && lim > beg)
1272*09d4459fSDaniel Fojt fwrite_errno (beg, 1, lim - beg);
1273680a9cb8SJohn Marino
1274680a9cb8SJohn Marino if (line_buffered)
1275*09d4459fSDaniel Fojt fflush_errno ();
1276*09d4459fSDaniel Fojt
1277*09d4459fSDaniel Fojt if (stdout_errno)
1278*09d4459fSDaniel Fojt die (EXIT_TROUBLE, stdout_errno, _("write error"));
1279*09d4459fSDaniel Fojt
1280*09d4459fSDaniel Fojt lastout = lim;
1281680a9cb8SJohn Marino }
1282680a9cb8SJohn Marino
1283*09d4459fSDaniel Fojt /* Print pending lines of trailing context prior to LIM. */
1284680a9cb8SJohn Marino static void
prpending(char const * lim)1285680a9cb8SJohn Marino prpending (char const *lim)
1286680a9cb8SJohn Marino {
1287680a9cb8SJohn Marino if (!lastout)
1288680a9cb8SJohn Marino lastout = bufbeg;
1289*09d4459fSDaniel Fojt for (; 0 < pending && lastout < lim; pending--)
1290680a9cb8SJohn Marino {
1291*09d4459fSDaniel Fojt char *nl = memchr (lastout, eolbyte, lim - lastout);
1292680a9cb8SJohn Marino prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1293680a9cb8SJohn Marino }
1294680a9cb8SJohn Marino }
1295680a9cb8SJohn Marino
1296680a9cb8SJohn Marino /* Output the lines between BEG and LIM. Deal with context. */
1297680a9cb8SJohn Marino static void
prtext(char * beg,char * lim)1298*09d4459fSDaniel Fojt prtext (char *beg, char *lim)
1299680a9cb8SJohn Marino {
1300680a9cb8SJohn Marino static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
1301680a9cb8SJohn Marino char eol = eolbyte;
1302680a9cb8SJohn Marino
1303680a9cb8SJohn Marino if (!out_quiet && pending > 0)
1304680a9cb8SJohn Marino prpending (beg);
1305680a9cb8SJohn Marino
1306*09d4459fSDaniel Fojt char *p = beg;
1307680a9cb8SJohn Marino
1308680a9cb8SJohn Marino if (!out_quiet)
1309680a9cb8SJohn Marino {
1310680a9cb8SJohn Marino /* Deal with leading context. */
1311680a9cb8SJohn Marino char const *bp = lastout ? lastout : bufbeg;
1312680a9cb8SJohn Marino intmax_t i;
1313680a9cb8SJohn Marino for (i = 0; i < out_before; ++i)
1314680a9cb8SJohn Marino if (p > bp)
1315680a9cb8SJohn Marino do
1316680a9cb8SJohn Marino --p;
1317680a9cb8SJohn Marino while (p[-1] != eol);
1318680a9cb8SJohn Marino
1319680a9cb8SJohn Marino /* Print the group separator unless the output is adjacent to
1320680a9cb8SJohn Marino the previous output in the file. */
1321680a9cb8SJohn Marino if ((0 <= out_before || 0 <= out_after) && used
1322680a9cb8SJohn Marino && p != lastout && group_separator)
1323680a9cb8SJohn Marino {
1324680a9cb8SJohn Marino pr_sgr_start_if (sep_color);
1325*09d4459fSDaniel Fojt fputs_errno (group_separator);
1326680a9cb8SJohn Marino pr_sgr_end_if (sep_color);
1327*09d4459fSDaniel Fojt putchar_errno ('\n');
1328680a9cb8SJohn Marino }
1329680a9cb8SJohn Marino
1330680a9cb8SJohn Marino while (p < beg)
1331680a9cb8SJohn Marino {
1332*09d4459fSDaniel Fojt char *nl = memchr (p, eol, beg - p);
1333680a9cb8SJohn Marino nl++;
1334680a9cb8SJohn Marino prline (p, nl, SEP_CHAR_REJECTED);
1335680a9cb8SJohn Marino p = nl;
1336680a9cb8SJohn Marino }
1337680a9cb8SJohn Marino }
1338680a9cb8SJohn Marino
1339680a9cb8SJohn Marino intmax_t n;
1340680a9cb8SJohn Marino if (out_invert)
1341680a9cb8SJohn Marino {
1342680a9cb8SJohn Marino /* One or more lines are output. */
1343680a9cb8SJohn Marino for (n = 0; p < lim && n < outleft; n++)
1344680a9cb8SJohn Marino {
1345*09d4459fSDaniel Fojt char *nl = memchr (p, eol, lim - p);
1346680a9cb8SJohn Marino nl++;
1347680a9cb8SJohn Marino if (!out_quiet)
1348680a9cb8SJohn Marino prline (p, nl, SEP_CHAR_SELECTED);
1349680a9cb8SJohn Marino p = nl;
1350680a9cb8SJohn Marino }
1351680a9cb8SJohn Marino }
1352680a9cb8SJohn Marino else
1353680a9cb8SJohn Marino {
1354680a9cb8SJohn Marino /* Just one line is output. */
1355680a9cb8SJohn Marino if (!out_quiet)
1356680a9cb8SJohn Marino prline (beg, lim, SEP_CHAR_SELECTED);
1357680a9cb8SJohn Marino n = 1;
1358680a9cb8SJohn Marino p = lim;
1359680a9cb8SJohn Marino }
1360680a9cb8SJohn Marino
1361680a9cb8SJohn Marino after_last_match = bufoffset - (buflim - p);
1362680a9cb8SJohn Marino pending = out_quiet ? 0 : MAX (0, out_after);
1363680a9cb8SJohn Marino used = true;
1364680a9cb8SJohn Marino outleft -= n;
1365680a9cb8SJohn Marino }
1366680a9cb8SJohn Marino
1367dc7c36e4SJohn Marino /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1368dc7c36e4SJohn Marino This avoids running out of memory when binary input contains a long
1369dc7c36e4SJohn Marino sequence of zeros, which would otherwise be considered to be part
1370dc7c36e4SJohn Marino of a long line. P[LIM] should be EOL. */
1371dc7c36e4SJohn Marino static void
zap_nuls(char * p,char * lim,char eol)1372dc7c36e4SJohn Marino zap_nuls (char *p, char *lim, char eol)
1373680a9cb8SJohn Marino {
1374dc7c36e4SJohn Marino if (eol)
1375dc7c36e4SJohn Marino while (true)
1376680a9cb8SJohn Marino {
1377dc7c36e4SJohn Marino *lim = '\0';
1378dc7c36e4SJohn Marino p += strlen (p);
1379dc7c36e4SJohn Marino *lim = eol;
1380dc7c36e4SJohn Marino if (p == lim)
1381dc7c36e4SJohn Marino break;
1382dc7c36e4SJohn Marino do
1383dc7c36e4SJohn Marino *p++ = eol;
1384dc7c36e4SJohn Marino while (!*p);
1385680a9cb8SJohn Marino }
1386680a9cb8SJohn Marino }
1387680a9cb8SJohn Marino
1388680a9cb8SJohn Marino /* Scan the specified portion of the buffer, matching lines (or
1389680a9cb8SJohn Marino between matching lines if OUT_INVERT is true). Return a count of
1390dc7c36e4SJohn Marino lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
1391680a9cb8SJohn Marino static intmax_t
grepbuf(char * beg,char const * lim)1392*09d4459fSDaniel Fojt grepbuf (char *beg, char const *lim)
1393680a9cb8SJohn Marino {
1394680a9cb8SJohn Marino intmax_t outleft0 = outleft;
1395*09d4459fSDaniel Fojt char *endp;
1396680a9cb8SJohn Marino
1397*09d4459fSDaniel Fojt for (char *p = beg; p < lim; p = endp)
1398680a9cb8SJohn Marino {
1399680a9cb8SJohn Marino size_t match_size;
1400*09d4459fSDaniel Fojt size_t match_offset = execute (compiled_pattern, p, lim - p,
1401*09d4459fSDaniel Fojt &match_size, NULL);
1402680a9cb8SJohn Marino if (match_offset == (size_t) -1)
1403680a9cb8SJohn Marino {
1404680a9cb8SJohn Marino if (!out_invert)
1405680a9cb8SJohn Marino break;
1406680a9cb8SJohn Marino match_offset = lim - p;
1407680a9cb8SJohn Marino match_size = 0;
1408680a9cb8SJohn Marino }
1409*09d4459fSDaniel Fojt char *b = p + match_offset;
1410680a9cb8SJohn Marino endp = b + match_size;
1411680a9cb8SJohn Marino /* Avoid matching the empty line at the end of the buffer. */
1412680a9cb8SJohn Marino if (!out_invert && b == lim)
1413680a9cb8SJohn Marino break;
1414680a9cb8SJohn Marino if (!out_invert || p < b)
1415680a9cb8SJohn Marino {
1416*09d4459fSDaniel Fojt char *prbeg = out_invert ? p : b;
1417*09d4459fSDaniel Fojt char *prend = out_invert ? b : endp;
1418680a9cb8SJohn Marino prtext (prbeg, prend);
1419680a9cb8SJohn Marino if (!outleft || done_on_match)
1420680a9cb8SJohn Marino {
1421680a9cb8SJohn Marino if (exit_on_match)
1422*09d4459fSDaniel Fojt exit (errseen ? exit_failure : EXIT_SUCCESS);
1423680a9cb8SJohn Marino break;
1424680a9cb8SJohn Marino }
1425680a9cb8SJohn Marino }
1426680a9cb8SJohn Marino }
1427680a9cb8SJohn Marino
1428680a9cb8SJohn Marino return outleft0 - outleft;
1429680a9cb8SJohn Marino }
1430680a9cb8SJohn Marino
1431*09d4459fSDaniel Fojt /* Search a given (non-directory) file. Return a count of lines printed.
1432*09d4459fSDaniel Fojt Set *INEOF to true if end-of-file reached. */
1433680a9cb8SJohn Marino static intmax_t
grep(int fd,struct stat const * st,bool * ineof)1434*09d4459fSDaniel Fojt grep (int fd, struct stat const *st, bool *ineof)
1435680a9cb8SJohn Marino {
1436680a9cb8SJohn Marino intmax_t nlines, i;
1437680a9cb8SJohn Marino size_t residue, save;
1438680a9cb8SJohn Marino char oldc;
1439680a9cb8SJohn Marino char *beg;
1440680a9cb8SJohn Marino char *lim;
1441680a9cb8SJohn Marino char eol = eolbyte;
1442dc7c36e4SJohn Marino char nul_zapper = '\0';
1443dc7c36e4SJohn Marino bool done_on_match_0 = done_on_match;
1444dc7c36e4SJohn Marino bool out_quiet_0 = out_quiet;
1445680a9cb8SJohn Marino
1446*09d4459fSDaniel Fojt /* The value of NLINES when nulls were first deduced in the input;
1447*09d4459fSDaniel Fojt this is not necessarily the same as the number of matching lines
1448*09d4459fSDaniel Fojt before the first null. -1 if no input nulls have been deduced. */
1449*09d4459fSDaniel Fojt intmax_t nlines_first_null = -1;
1450*09d4459fSDaniel Fojt
1451680a9cb8SJohn Marino if (! reset (fd, st))
1452680a9cb8SJohn Marino return 0;
1453680a9cb8SJohn Marino
1454680a9cb8SJohn Marino totalcc = 0;
1455680a9cb8SJohn Marino lastout = 0;
1456680a9cb8SJohn Marino totalnl = 0;
1457680a9cb8SJohn Marino outleft = max_count;
1458680a9cb8SJohn Marino after_last_match = 0;
1459680a9cb8SJohn Marino pending = 0;
1460dc7c36e4SJohn Marino skip_nuls = skip_empty_lines && !eol;
1461*09d4459fSDaniel Fojt encoding_error_output = false;
1462680a9cb8SJohn Marino
1463680a9cb8SJohn Marino nlines = 0;
1464680a9cb8SJohn Marino residue = 0;
1465680a9cb8SJohn Marino save = 0;
1466680a9cb8SJohn Marino
1467680a9cb8SJohn Marino if (! fillbuf (save, st))
1468680a9cb8SJohn Marino {
1469*09d4459fSDaniel Fojt suppressible_error (errno);
1470680a9cb8SJohn Marino return 0;
1471680a9cb8SJohn Marino }
1472680a9cb8SJohn Marino
1473*09d4459fSDaniel Fojt offset_width = 0;
1474*09d4459fSDaniel Fojt if (align_tabs)
1475dc7c36e4SJohn Marino {
1476*09d4459fSDaniel Fojt /* Width is log of maximum number. Line numbers are origin-1. */
1477*09d4459fSDaniel Fojt uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
1478*09d4459fSDaniel Fojt num += out_line && num < UINTMAX_MAX;
1479*09d4459fSDaniel Fojt do
1480*09d4459fSDaniel Fojt offset_width++;
1481*09d4459fSDaniel Fojt while ((num /= 10) != 0);
1482*09d4459fSDaniel Fojt }
1483*09d4459fSDaniel Fojt
1484*09d4459fSDaniel Fojt for (bool firsttime = true; ; firsttime = false)
1485*09d4459fSDaniel Fojt {
1486*09d4459fSDaniel Fojt if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
1487*09d4459fSDaniel Fojt && (buf_has_nulls (bufbeg, buflim - bufbeg)
1488*09d4459fSDaniel Fojt || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
1489dc7c36e4SJohn Marino {
1490dc7c36e4SJohn Marino if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1491680a9cb8SJohn Marino return 0;
1492*09d4459fSDaniel Fojt if (!count_matches)
1493dc7c36e4SJohn Marino done_on_match = out_quiet = true;
1494*09d4459fSDaniel Fojt nlines_first_null = nlines;
1495dc7c36e4SJohn Marino nul_zapper = eol;
1496dc7c36e4SJohn Marino skip_nuls = skip_empty_lines;
1497dc7c36e4SJohn Marino }
1498680a9cb8SJohn Marino
1499680a9cb8SJohn Marino lastnl = bufbeg;
1500680a9cb8SJohn Marino if (lastout)
1501680a9cb8SJohn Marino lastout = bufbeg;
1502680a9cb8SJohn Marino
1503680a9cb8SJohn Marino beg = bufbeg + save;
1504680a9cb8SJohn Marino
1505680a9cb8SJohn Marino /* no more data to scan (eof) except for maybe a residue -> break */
1506680a9cb8SJohn Marino if (beg == buflim)
1507*09d4459fSDaniel Fojt {
1508*09d4459fSDaniel Fojt *ineof = true;
1509680a9cb8SJohn Marino break;
1510*09d4459fSDaniel Fojt }
1511680a9cb8SJohn Marino
1512dc7c36e4SJohn Marino zap_nuls (beg, buflim, nul_zapper);
1513dc7c36e4SJohn Marino
1514680a9cb8SJohn Marino /* Determine new residue (the length of an incomplete line at the end of
1515680a9cb8SJohn Marino the buffer, 0 means there is no incomplete last line). */
1516680a9cb8SJohn Marino oldc = beg[-1];
1517680a9cb8SJohn Marino beg[-1] = eol;
1518680a9cb8SJohn Marino /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1519680a9cb8SJohn Marino that this use of memrchr is guaranteed never to return NULL. */
1520680a9cb8SJohn Marino lim = memrchr (beg - 1, eol, buflim - beg + 1);
1521680a9cb8SJohn Marino ++lim;
1522680a9cb8SJohn Marino beg[-1] = oldc;
1523680a9cb8SJohn Marino if (lim == beg)
1524680a9cb8SJohn Marino lim = beg - residue;
1525680a9cb8SJohn Marino beg -= residue;
1526680a9cb8SJohn Marino residue = buflim - lim;
1527680a9cb8SJohn Marino
1528680a9cb8SJohn Marino if (beg < lim)
1529680a9cb8SJohn Marino {
1530680a9cb8SJohn Marino if (outleft)
1531680a9cb8SJohn Marino nlines += grepbuf (beg, lim);
1532680a9cb8SJohn Marino if (pending)
1533680a9cb8SJohn Marino prpending (lim);
1534*09d4459fSDaniel Fojt if ((!outleft && !pending)
1535*09d4459fSDaniel Fojt || (done_on_match && MAX (0, nlines_first_null) < nlines))
1536680a9cb8SJohn Marino goto finish_grep;
1537680a9cb8SJohn Marino }
1538680a9cb8SJohn Marino
1539680a9cb8SJohn Marino /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1540680a9cb8SJohn Marino leading context if there is a matching line at the begin of the
1541680a9cb8SJohn Marino next data. Make beg point to their begin. */
1542680a9cb8SJohn Marino i = 0;
1543680a9cb8SJohn Marino beg = lim;
1544680a9cb8SJohn Marino while (i < out_before && beg > bufbeg && beg != lastout)
1545680a9cb8SJohn Marino {
1546680a9cb8SJohn Marino ++i;
1547680a9cb8SJohn Marino do
1548680a9cb8SJohn Marino --beg;
1549680a9cb8SJohn Marino while (beg[-1] != eol);
1550680a9cb8SJohn Marino }
1551680a9cb8SJohn Marino
1552680a9cb8SJohn Marino /* Detect whether leading context is adjacent to previous output. */
1553680a9cb8SJohn Marino if (beg != lastout)
1554680a9cb8SJohn Marino lastout = 0;
1555680a9cb8SJohn Marino
1556680a9cb8SJohn Marino /* Handle some details and read more data to scan. */
1557680a9cb8SJohn Marino save = residue + lim - beg;
1558680a9cb8SJohn Marino if (out_byte)
1559680a9cb8SJohn Marino totalcc = add_count (totalcc, buflim - bufbeg - save);
1560680a9cb8SJohn Marino if (out_line)
1561680a9cb8SJohn Marino nlscan (beg);
1562680a9cb8SJohn Marino if (! fillbuf (save, st))
1563680a9cb8SJohn Marino {
1564*09d4459fSDaniel Fojt suppressible_error (errno);
1565680a9cb8SJohn Marino goto finish_grep;
1566680a9cb8SJohn Marino }
1567680a9cb8SJohn Marino }
1568680a9cb8SJohn Marino if (residue)
1569680a9cb8SJohn Marino {
1570680a9cb8SJohn Marino *buflim++ = eol;
1571680a9cb8SJohn Marino if (outleft)
1572680a9cb8SJohn Marino nlines += grepbuf (bufbeg + save - residue, buflim);
1573680a9cb8SJohn Marino if (pending)
1574680a9cb8SJohn Marino prpending (buflim);
1575680a9cb8SJohn Marino }
1576680a9cb8SJohn Marino
1577680a9cb8SJohn Marino finish_grep:
1578dc7c36e4SJohn Marino done_on_match = done_on_match_0;
1579dc7c36e4SJohn Marino out_quiet = out_quiet_0;
1580*09d4459fSDaniel Fojt if (!out_quiet && (encoding_error_output
1581*09d4459fSDaniel Fojt || (0 <= nlines_first_null && nlines_first_null < nlines)))
1582*09d4459fSDaniel Fojt {
1583*09d4459fSDaniel Fojt printf_errno (_("Binary file %s matches\n"), input_filename ());
1584*09d4459fSDaniel Fojt if (line_buffered)
1585*09d4459fSDaniel Fojt fflush_errno ();
1586*09d4459fSDaniel Fojt }
1587680a9cb8SJohn Marino return nlines;
1588680a9cb8SJohn Marino }
1589680a9cb8SJohn Marino
1590dc7c36e4SJohn Marino static bool
grepdirent(FTS * fts,FTSENT * ent,bool command_line)1591dc7c36e4SJohn Marino grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1592680a9cb8SJohn Marino {
1593dc7c36e4SJohn Marino bool follow;
1594680a9cb8SJohn Marino command_line &= ent->fts_level == FTS_ROOTLEVEL;
1595680a9cb8SJohn Marino
1596680a9cb8SJohn Marino if (ent->fts_info == FTS_DP)
1597dc7c36e4SJohn Marino return true;
1598680a9cb8SJohn Marino
1599dc7c36e4SJohn Marino if (!command_line
1600dc7c36e4SJohn Marino && skipped_file (ent->fts_name, false,
1601680a9cb8SJohn Marino (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1602680a9cb8SJohn Marino || ent->fts_info == FTS_DNR)))
1603680a9cb8SJohn Marino {
1604680a9cb8SJohn Marino fts_set (fts, ent, FTS_SKIP);
1605dc7c36e4SJohn Marino return true;
1606680a9cb8SJohn Marino }
1607680a9cb8SJohn Marino
1608dc7c36e4SJohn Marino filename = ent->fts_path;
1609dc7c36e4SJohn Marino if (omit_dot_slash && filename[1])
1610dc7c36e4SJohn Marino filename += 2;
1611680a9cb8SJohn Marino follow = (fts->fts_options & FTS_LOGICAL
1612680a9cb8SJohn Marino || (fts->fts_options & FTS_COMFOLLOW && command_line));
1613680a9cb8SJohn Marino
1614680a9cb8SJohn Marino switch (ent->fts_info)
1615680a9cb8SJohn Marino {
1616680a9cb8SJohn Marino case FTS_D:
1617680a9cb8SJohn Marino if (directories == RECURSE_DIRECTORIES)
1618dc7c36e4SJohn Marino return true;
1619680a9cb8SJohn Marino fts_set (fts, ent, FTS_SKIP);
1620680a9cb8SJohn Marino break;
1621680a9cb8SJohn Marino
1622680a9cb8SJohn Marino case FTS_DC:
1623680a9cb8SJohn Marino if (!suppress_errors)
1624680a9cb8SJohn Marino error (0, 0, _("warning: %s: %s"), filename,
1625680a9cb8SJohn Marino _("recursive directory loop"));
1626dc7c36e4SJohn Marino return true;
1627680a9cb8SJohn Marino
1628680a9cb8SJohn Marino case FTS_DNR:
1629680a9cb8SJohn Marino case FTS_ERR:
1630680a9cb8SJohn Marino case FTS_NS:
1631*09d4459fSDaniel Fojt suppressible_error (ent->fts_errno);
1632dc7c36e4SJohn Marino return true;
1633680a9cb8SJohn Marino
1634680a9cb8SJohn Marino case FTS_DEFAULT:
1635680a9cb8SJohn Marino case FTS_NSOK:
1636dc7c36e4SJohn Marino if (skip_devices (command_line))
1637680a9cb8SJohn Marino {
1638dc7c36e4SJohn Marino struct stat *st = ent->fts_statp;
1639680a9cb8SJohn Marino struct stat st1;
1640680a9cb8SJohn Marino if (! st->st_mode)
1641680a9cb8SJohn Marino {
1642680a9cb8SJohn Marino /* The file type is not already known. Get the file status
1643680a9cb8SJohn Marino before opening, since opening might have side effects
1644680a9cb8SJohn Marino on a device. */
1645680a9cb8SJohn Marino int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1646680a9cb8SJohn Marino if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1647680a9cb8SJohn Marino {
1648*09d4459fSDaniel Fojt suppressible_error (errno);
1649dc7c36e4SJohn Marino return true;
1650680a9cb8SJohn Marino }
1651680a9cb8SJohn Marino st = &st1;
1652680a9cb8SJohn Marino }
1653680a9cb8SJohn Marino if (is_device_mode (st->st_mode))
1654dc7c36e4SJohn Marino return true;
1655680a9cb8SJohn Marino }
1656680a9cb8SJohn Marino break;
1657680a9cb8SJohn Marino
1658680a9cb8SJohn Marino case FTS_F:
1659680a9cb8SJohn Marino case FTS_SLNONE:
1660680a9cb8SJohn Marino break;
1661680a9cb8SJohn Marino
1662680a9cb8SJohn Marino case FTS_SL:
1663680a9cb8SJohn Marino case FTS_W:
1664dc7c36e4SJohn Marino return true;
1665680a9cb8SJohn Marino
1666680a9cb8SJohn Marino default:
1667680a9cb8SJohn Marino abort ();
1668680a9cb8SJohn Marino }
1669680a9cb8SJohn Marino
1670*09d4459fSDaniel Fojt return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
1671680a9cb8SJohn Marino }
1672680a9cb8SJohn Marino
1673dc7c36e4SJohn Marino /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1674dc7c36e4SJohn Marino POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */
1675dc7c36e4SJohn Marino static bool
open_symlink_nofollow_error(int err)1676dc7c36e4SJohn Marino open_symlink_nofollow_error (int err)
1677680a9cb8SJohn Marino {
1678dc7c36e4SJohn Marino if (err == ELOOP || err == EMLINK)
1679dc7c36e4SJohn Marino return true;
1680dc7c36e4SJohn Marino #ifdef EFTYPE
1681dc7c36e4SJohn Marino if (err == EFTYPE)
1682dc7c36e4SJohn Marino return true;
1683dc7c36e4SJohn Marino #endif
1684dc7c36e4SJohn Marino return false;
1685dc7c36e4SJohn Marino }
1686dc7c36e4SJohn Marino
1687dc7c36e4SJohn Marino static bool
grepfile(int dirdesc,char const * name,bool follow,bool command_line)1688dc7c36e4SJohn Marino grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1689dc7c36e4SJohn Marino {
1690dc7c36e4SJohn Marino int oflag = (O_RDONLY | O_NOCTTY
1691*09d4459fSDaniel Fojt | (IGNORE_DUPLICATE_BRANCH_WARNING
1692*09d4459fSDaniel Fojt (binary ? O_BINARY : 0))
1693dc7c36e4SJohn Marino | (follow ? 0 : O_NOFOLLOW)
1694dc7c36e4SJohn Marino | (skip_devices (command_line) ? O_NONBLOCK : 0));
1695dc7c36e4SJohn Marino int desc = openat_safer (dirdesc, name, oflag);
1696680a9cb8SJohn Marino if (desc < 0)
1697680a9cb8SJohn Marino {
1698dc7c36e4SJohn Marino if (follow || ! open_symlink_nofollow_error (errno))
1699*09d4459fSDaniel Fojt suppressible_error (errno);
1700dc7c36e4SJohn Marino return true;
1701680a9cb8SJohn Marino }
1702680a9cb8SJohn Marino return grepdesc (desc, command_line);
1703680a9cb8SJohn Marino }
1704680a9cb8SJohn Marino
1705*09d4459fSDaniel Fojt /* Read all data from FD, with status ST. Return true if successful,
1706*09d4459fSDaniel Fojt false (setting errno) otherwise. */
1707*09d4459fSDaniel Fojt static bool
drain_input(int fd,struct stat const * st)1708*09d4459fSDaniel Fojt drain_input (int fd, struct stat const *st)
1709*09d4459fSDaniel Fojt {
1710*09d4459fSDaniel Fojt ssize_t nbytes;
1711*09d4459fSDaniel Fojt if (S_ISFIFO (st->st_mode) && dev_null_output)
1712*09d4459fSDaniel Fojt {
1713*09d4459fSDaniel Fojt #ifdef SPLICE_F_MOVE
1714*09d4459fSDaniel Fojt /* Should be faster, since it need not copy data to user space. */
1715*09d4459fSDaniel Fojt nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1716*09d4459fSDaniel Fojt INITIAL_BUFSIZE, SPLICE_F_MOVE);
1717*09d4459fSDaniel Fojt if (0 <= nbytes || errno != EINVAL)
1718*09d4459fSDaniel Fojt {
1719*09d4459fSDaniel Fojt while (0 < nbytes)
1720*09d4459fSDaniel Fojt nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1721*09d4459fSDaniel Fojt INITIAL_BUFSIZE, SPLICE_F_MOVE);
1722*09d4459fSDaniel Fojt return nbytes == 0;
1723*09d4459fSDaniel Fojt }
1724*09d4459fSDaniel Fojt #endif
1725*09d4459fSDaniel Fojt }
1726*09d4459fSDaniel Fojt while ((nbytes = safe_read (fd, buffer, bufalloc)))
1727*09d4459fSDaniel Fojt if (nbytes == SAFE_READ_ERROR)
1728*09d4459fSDaniel Fojt return false;
1729*09d4459fSDaniel Fojt return true;
1730*09d4459fSDaniel Fojt }
1731*09d4459fSDaniel Fojt
1732*09d4459fSDaniel Fojt /* Finish reading from FD, with status ST and where end-of-file has
1733*09d4459fSDaniel Fojt been seen if INEOF. Typically this is a no-op, but when reading
1734*09d4459fSDaniel Fojt from standard input this may adjust the file offset or drain a
1735*09d4459fSDaniel Fojt pipe. */
1736*09d4459fSDaniel Fojt
1737*09d4459fSDaniel Fojt static void
finalize_input(int fd,struct stat const * st,bool ineof)1738*09d4459fSDaniel Fojt finalize_input (int fd, struct stat const *st, bool ineof)
1739*09d4459fSDaniel Fojt {
1740*09d4459fSDaniel Fojt if (fd == STDIN_FILENO
1741*09d4459fSDaniel Fojt && (outleft
1742*09d4459fSDaniel Fojt ? (!ineof
1743*09d4459fSDaniel Fojt && (seek_failed
1744*09d4459fSDaniel Fojt || (lseek (fd, 0, SEEK_END) < 0
1745*09d4459fSDaniel Fojt /* Linux proc file system has EINVAL (Bug#25180). */
1746*09d4459fSDaniel Fojt && errno != EINVAL))
1747*09d4459fSDaniel Fojt && ! drain_input (fd, st))
1748*09d4459fSDaniel Fojt : (bufoffset != after_last_match && !seek_failed
1749*09d4459fSDaniel Fojt && lseek (fd, after_last_match, SEEK_SET) < 0)))
1750*09d4459fSDaniel Fojt suppressible_error (errno);
1751*09d4459fSDaniel Fojt }
1752*09d4459fSDaniel Fojt
1753dc7c36e4SJohn Marino static bool
grepdesc(int desc,bool command_line)1754dc7c36e4SJohn Marino grepdesc (int desc, bool command_line)
1755680a9cb8SJohn Marino {
1756680a9cb8SJohn Marino intmax_t count;
1757dc7c36e4SJohn Marino bool status = true;
1758*09d4459fSDaniel Fojt bool ineof = false;
1759680a9cb8SJohn Marino struct stat st;
1760680a9cb8SJohn Marino
1761680a9cb8SJohn Marino /* Get the file status, possibly for the second time. This catches
1762680a9cb8SJohn Marino a race condition if the directory entry changes after the
1763680a9cb8SJohn Marino directory entry is read and before the file is opened. For
1764680a9cb8SJohn Marino example, normally DESC is a directory only at the top level, but
1765680a9cb8SJohn Marino there is an exception if some other process substitutes a
1766680a9cb8SJohn Marino directory for a non-directory while 'grep' is running. */
1767680a9cb8SJohn Marino if (fstat (desc, &st) != 0)
1768680a9cb8SJohn Marino {
1769*09d4459fSDaniel Fojt suppressible_error (errno);
1770680a9cb8SJohn Marino goto closeout;
1771680a9cb8SJohn Marino }
1772680a9cb8SJohn Marino
1773dc7c36e4SJohn Marino if (desc != STDIN_FILENO && skip_devices (command_line)
1774dc7c36e4SJohn Marino && is_device_mode (st.st_mode))
1775dc7c36e4SJohn Marino goto closeout;
1776dc7c36e4SJohn Marino
1777680a9cb8SJohn Marino if (desc != STDIN_FILENO && command_line
1778dc7c36e4SJohn Marino && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1779680a9cb8SJohn Marino goto closeout;
1780680a9cb8SJohn Marino
1781*09d4459fSDaniel Fojt /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'. */
1782*09d4459fSDaniel Fojt if (out_file < 0)
1783*09d4459fSDaniel Fojt out_file = !!S_ISDIR (st.st_mode);
1784*09d4459fSDaniel Fojt
1785680a9cb8SJohn Marino if (desc != STDIN_FILENO
1786680a9cb8SJohn Marino && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1787680a9cb8SJohn Marino {
1788680a9cb8SJohn Marino /* Traverse the directory starting with its full name, because
1789680a9cb8SJohn Marino unfortunately fts provides no way to traverse the directory
1790680a9cb8SJohn Marino starting from its file descriptor. */
1791680a9cb8SJohn Marino
1792680a9cb8SJohn Marino FTS *fts;
1793680a9cb8SJohn Marino FTSENT *ent;
1794680a9cb8SJohn Marino int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1795680a9cb8SJohn Marino char *fts_arg[2];
1796680a9cb8SJohn Marino
1797680a9cb8SJohn Marino /* Close DESC now, to conserve file descriptors if the race
1798680a9cb8SJohn Marino condition occurs many times in a deep recursion. */
1799680a9cb8SJohn Marino if (close (desc) != 0)
1800*09d4459fSDaniel Fojt suppressible_error (errno);
1801680a9cb8SJohn Marino
1802680a9cb8SJohn Marino fts_arg[0] = (char *) filename;
1803680a9cb8SJohn Marino fts_arg[1] = NULL;
1804680a9cb8SJohn Marino fts = fts_open (fts_arg, opts, NULL);
1805680a9cb8SJohn Marino
1806680a9cb8SJohn Marino if (!fts)
1807680a9cb8SJohn Marino xalloc_die ();
1808680a9cb8SJohn Marino while ((ent = fts_read (fts)))
1809680a9cb8SJohn Marino status &= grepdirent (fts, ent, command_line);
1810680a9cb8SJohn Marino if (errno)
1811*09d4459fSDaniel Fojt suppressible_error (errno);
1812680a9cb8SJohn Marino if (fts_close (fts) != 0)
1813*09d4459fSDaniel Fojt suppressible_error (errno);
1814680a9cb8SJohn Marino return status;
1815680a9cb8SJohn Marino }
1816680a9cb8SJohn Marino if (desc != STDIN_FILENO
1817680a9cb8SJohn Marino && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1818680a9cb8SJohn Marino || ((devices == SKIP_DEVICES
1819680a9cb8SJohn Marino || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1820680a9cb8SJohn Marino && is_device_mode (st.st_mode))))
1821680a9cb8SJohn Marino goto closeout;
1822680a9cb8SJohn Marino
1823680a9cb8SJohn Marino /* If there is a regular file on stdout and the current file refers
1824680a9cb8SJohn Marino to the same i-node, we have to report the problem and skip it.
1825680a9cb8SJohn Marino Otherwise when matching lines from some other input reach the
1826680a9cb8SJohn Marino disk before we open this file, we can end up reading and matching
1827680a9cb8SJohn Marino those lines and appending them to the file from which we're reading.
1828680a9cb8SJohn Marino Then we'd have what appears to be an infinite loop that'd terminate
1829680a9cb8SJohn Marino only upon filling the output file system or reaching a quota.
1830680a9cb8SJohn Marino However, there is no risk of an infinite loop if grep is generating
1831680a9cb8SJohn Marino no output, i.e., with --silent, --quiet, -q.
1832680a9cb8SJohn Marino Similarly, with any of these:
1833680a9cb8SJohn Marino --max-count=N (-m) (for N >= 2)
1834680a9cb8SJohn Marino --files-with-matches (-l)
1835680a9cb8SJohn Marino --files-without-match (-L)
1836680a9cb8SJohn Marino there is no risk of trouble.
1837680a9cb8SJohn Marino For --max-count=1, grep stops after printing the first match,
1838680a9cb8SJohn Marino so there is no risk of malfunction. But even --max-count=2, with
1839680a9cb8SJohn Marino input==output, while there is no risk of infloop, there is a race
1840680a9cb8SJohn Marino condition that could result in "alternate" output. */
1841*09d4459fSDaniel Fojt if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
1842*09d4459fSDaniel Fojt && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
1843680a9cb8SJohn Marino {
1844680a9cb8SJohn Marino if (! suppress_errors)
1845*09d4459fSDaniel Fojt error (0, 0, _("input file %s is also the output"),
1846*09d4459fSDaniel Fojt quote (input_filename ()));
1847dc7c36e4SJohn Marino errseen = true;
1848680a9cb8SJohn Marino goto closeout;
1849680a9cb8SJohn Marino }
1850680a9cb8SJohn Marino
1851*09d4459fSDaniel Fojt count = grep (desc, &st, &ineof);
1852680a9cb8SJohn Marino if (count_matches)
1853680a9cb8SJohn Marino {
1854680a9cb8SJohn Marino if (out_file)
1855680a9cb8SJohn Marino {
1856680a9cb8SJohn Marino print_filename ();
1857680a9cb8SJohn Marino if (filename_mask)
1858680a9cb8SJohn Marino print_sep (SEP_CHAR_SELECTED);
1859680a9cb8SJohn Marino else
1860*09d4459fSDaniel Fojt putchar_errno (0);
1861680a9cb8SJohn Marino }
1862*09d4459fSDaniel Fojt printf_errno ("%" PRIdMAX "\n", count);
1863*09d4459fSDaniel Fojt if (line_buffered)
1864*09d4459fSDaniel Fojt fflush_errno ();
1865680a9cb8SJohn Marino }
1866680a9cb8SJohn Marino
1867*09d4459fSDaniel Fojt status = !count == !(list_files == LISTFILES_NONMATCHING);
1868*09d4459fSDaniel Fojt
1869*09d4459fSDaniel Fojt if (list_files == LISTFILES_NONE || dev_null_output)
1870*09d4459fSDaniel Fojt finalize_input (desc, &st, ineof);
1871*09d4459fSDaniel Fojt else if (status == 0)
1872680a9cb8SJohn Marino {
1873680a9cb8SJohn Marino print_filename ();
1874*09d4459fSDaniel Fojt putchar_errno ('\n' & filename_mask);
1875*09d4459fSDaniel Fojt if (line_buffered)
1876*09d4459fSDaniel Fojt fflush_errno ();
1877680a9cb8SJohn Marino }
1878680a9cb8SJohn Marino
1879680a9cb8SJohn Marino closeout:
1880680a9cb8SJohn Marino if (desc != STDIN_FILENO && close (desc) != 0)
1881*09d4459fSDaniel Fojt suppressible_error (errno);
1882680a9cb8SJohn Marino return status;
1883680a9cb8SJohn Marino }
1884680a9cb8SJohn Marino
1885dc7c36e4SJohn Marino static bool
grep_command_line_arg(char const * arg)1886680a9cb8SJohn Marino grep_command_line_arg (char const *arg)
1887680a9cb8SJohn Marino {
1888680a9cb8SJohn Marino if (STREQ (arg, "-"))
1889680a9cb8SJohn Marino {
1890*09d4459fSDaniel Fojt filename = label;
1891*09d4459fSDaniel Fojt if (binary)
1892*09d4459fSDaniel Fojt xset_binary_mode (STDIN_FILENO, O_BINARY);
1893dc7c36e4SJohn Marino return grepdesc (STDIN_FILENO, true);
1894680a9cb8SJohn Marino }
1895680a9cb8SJohn Marino else
1896680a9cb8SJohn Marino {
1897680a9cb8SJohn Marino filename = arg;
1898dc7c36e4SJohn Marino return grepfile (AT_FDCWD, arg, true, true);
1899680a9cb8SJohn Marino }
1900680a9cb8SJohn Marino }
1901680a9cb8SJohn Marino
1902680a9cb8SJohn Marino _Noreturn void usage (int);
1903680a9cb8SJohn Marino void
usage(int status)1904680a9cb8SJohn Marino usage (int status)
1905680a9cb8SJohn Marino {
1906680a9cb8SJohn Marino if (status != 0)
1907680a9cb8SJohn Marino {
1908*09d4459fSDaniel Fojt fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
1909*09d4459fSDaniel Fojt getprogname ());
1910680a9cb8SJohn Marino fprintf (stderr, _("Try '%s --help' for more information.\n"),
1911*09d4459fSDaniel Fojt getprogname ());
1912680a9cb8SJohn Marino }
1913680a9cb8SJohn Marino else
1914680a9cb8SJohn Marino {
1915*09d4459fSDaniel Fojt printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
1916*09d4459fSDaniel Fojt printf (_("Search for PATTERNS in each FILE.\n"));
1917680a9cb8SJohn Marino printf (_("\
1918680a9cb8SJohn Marino Example: %s -i 'hello world' menu.h main.c\n\
1919*09d4459fSDaniel Fojt PATTERNS can contain multiple patterns separated by newlines.\n\
1920680a9cb8SJohn Marino \n\
1921*09d4459fSDaniel Fojt Pattern selection and interpretation:\n"), getprogname ());
1922680a9cb8SJohn Marino printf (_("\
1923*09d4459fSDaniel Fojt -E, --extended-regexp PATTERNS are extended regular expressions\n\
1924*09d4459fSDaniel Fojt -F, --fixed-strings PATTERNS are strings\n\
1925*09d4459fSDaniel Fojt -G, --basic-regexp PATTERNS are basic regular expressions\n\
1926*09d4459fSDaniel Fojt -P, --perl-regexp PATTERNS are Perl regular expressions\n"));
1927dc7c36e4SJohn Marino /* -X is deliberately undocumented. */
1928680a9cb8SJohn Marino printf (_("\
1929*09d4459fSDaniel Fojt -e, --regexp=PATTERNS use PATTERNS for matching\n\
1930*09d4459fSDaniel Fojt -f, --file=FILE take PATTERNS from FILE\n\
1931*09d4459fSDaniel Fojt -i, --ignore-case ignore case distinctions in patterns and data\n\
1932*09d4459fSDaniel Fojt --no-ignore-case do not ignore case distinctions (default)\n\
1933*09d4459fSDaniel Fojt -w, --word-regexp match only whole words\n\
1934*09d4459fSDaniel Fojt -x, --line-regexp match only whole lines\n\
1935680a9cb8SJohn Marino -z, --null-data a data line ends in 0 byte, not newline\n"));
1936680a9cb8SJohn Marino printf (_("\
1937680a9cb8SJohn Marino \n\
1938680a9cb8SJohn Marino Miscellaneous:\n\
1939680a9cb8SJohn Marino -s, --no-messages suppress error messages\n\
1940680a9cb8SJohn Marino -v, --invert-match select non-matching lines\n\
1941680a9cb8SJohn Marino -V, --version display version information and exit\n\
1942680a9cb8SJohn Marino --help display this help text and exit\n"));
1943680a9cb8SJohn Marino printf (_("\
1944680a9cb8SJohn Marino \n\
1945680a9cb8SJohn Marino Output control:\n\
1946*09d4459fSDaniel Fojt -m, --max-count=NUM stop after NUM selected lines\n\
1947680a9cb8SJohn Marino -b, --byte-offset print the byte offset with output lines\n\
1948680a9cb8SJohn Marino -n, --line-number print line number with output lines\n\
1949680a9cb8SJohn Marino --line-buffered flush output on every line\n\
1950*09d4459fSDaniel Fojt -H, --with-filename print file name with output lines\n\
1951680a9cb8SJohn Marino -h, --no-filename suppress the file name prefix on output\n\
1952680a9cb8SJohn Marino --label=LABEL use LABEL as the standard input file name prefix\n\
1953680a9cb8SJohn Marino "));
1954680a9cb8SJohn Marino printf (_("\
1955*09d4459fSDaniel Fojt -o, --only-matching show only nonempty parts of lines that match\n\
1956680a9cb8SJohn Marino -q, --quiet, --silent suppress all normal output\n\
1957680a9cb8SJohn Marino --binary-files=TYPE assume that binary files are TYPE;\n\
1958680a9cb8SJohn Marino TYPE is 'binary', 'text', or 'without-match'\n\
1959680a9cb8SJohn Marino -a, --text equivalent to --binary-files=text\n\
1960680a9cb8SJohn Marino "));
1961680a9cb8SJohn Marino printf (_("\
1962680a9cb8SJohn Marino -I equivalent to --binary-files=without-match\n\
1963680a9cb8SJohn Marino -d, --directories=ACTION how to handle directories;\n\
1964680a9cb8SJohn Marino ACTION is 'read', 'recurse', or 'skip'\n\
1965680a9cb8SJohn Marino -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
1966680a9cb8SJohn Marino ACTION is 'read' or 'skip'\n\
1967680a9cb8SJohn Marino -r, --recursive like --directories=recurse\n\
1968680a9cb8SJohn Marino -R, --dereference-recursive likewise, but follow all symlinks\n\
1969680a9cb8SJohn Marino "));
1970680a9cb8SJohn Marino printf (_("\
1971*09d4459fSDaniel Fojt --include=GLOB search only files that match GLOB (a file pattern)"
1972*09d4459fSDaniel Fojt "\n\
1973*09d4459fSDaniel Fojt --exclude=GLOB skip files that match GLOB\n\
1974*09d4459fSDaniel Fojt --exclude-from=FILE skip files that match any file pattern from FILE\n\
1975*09d4459fSDaniel Fojt --exclude-dir=GLOB skip directories that match GLOB\n\
1976680a9cb8SJohn Marino "));
1977680a9cb8SJohn Marino printf (_("\
1978*09d4459fSDaniel Fojt -L, --files-without-match print only names of FILEs with no selected lines\n\
1979*09d4459fSDaniel Fojt -l, --files-with-matches print only names of FILEs with selected lines\n\
1980*09d4459fSDaniel Fojt -c, --count print only a count of selected lines per FILE\n\
1981680a9cb8SJohn Marino -T, --initial-tab make tabs line up (if needed)\n\
1982680a9cb8SJohn Marino -Z, --null print 0 byte after FILE name\n"));
1983680a9cb8SJohn Marino printf (_("\
1984680a9cb8SJohn Marino \n\
1985680a9cb8SJohn Marino Context control:\n\
1986680a9cb8SJohn Marino -B, --before-context=NUM print NUM lines of leading context\n\
1987680a9cb8SJohn Marino -A, --after-context=NUM print NUM lines of trailing context\n\
1988680a9cb8SJohn Marino -C, --context=NUM print NUM lines of output context\n\
1989680a9cb8SJohn Marino "));
1990680a9cb8SJohn Marino printf (_("\
1991680a9cb8SJohn Marino -NUM same as --context=NUM\n\
1992680a9cb8SJohn Marino --color[=WHEN],\n\
1993680a9cb8SJohn Marino --colour[=WHEN] use markers to highlight the matching strings;\n\
1994680a9cb8SJohn Marino WHEN is 'always', 'never', or 'auto'\n\
1995680a9cb8SJohn Marino -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
1996680a9cb8SJohn Marino \n"));
1997680a9cb8SJohn Marino printf (_("\
1998*09d4459fSDaniel Fojt When FILE is '-', read standard input. With no FILE, read '.' if\n\
1999*09d4459fSDaniel Fojt recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\
2000*09d4459fSDaniel Fojt Exit status is 0 if any line (or file if -L) is selected, 1 otherwise;\n\
2001680a9cb8SJohn Marino if any error occurs and -q is not given, the exit status is 2.\n"));
2002dc7c36e4SJohn Marino emit_bug_reporting_address ();
2003680a9cb8SJohn Marino }
2004680a9cb8SJohn Marino exit (status);
2005680a9cb8SJohn Marino }
2006680a9cb8SJohn Marino
2007680a9cb8SJohn Marino /* Pattern compilers and matchers. */
200895b7b453SJohn Marino
2009*09d4459fSDaniel Fojt static struct
201095b7b453SJohn Marino {
2011*09d4459fSDaniel Fojt char name[12];
2012*09d4459fSDaniel Fojt int syntax; /* used if compile == GEAcompile */
2013680a9cb8SJohn Marino compile_fp_t compile;
2014680a9cb8SJohn Marino execute_fp_t execute;
2015*09d4459fSDaniel Fojt } const matchers[] = {
2016*09d4459fSDaniel Fojt { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute },
2017*09d4459fSDaniel Fojt { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute },
2018*09d4459fSDaniel Fojt { "fgrep", 0, Fcompile, Fexecute, },
2019*09d4459fSDaniel Fojt { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute },
2020*09d4459fSDaniel Fojt { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute },
2021*09d4459fSDaniel Fojt { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute },
2022*09d4459fSDaniel Fojt #if HAVE_LIBPCRE
2023*09d4459fSDaniel Fojt { "perl", 0, Pcompile, Pexecute, },
2024*09d4459fSDaniel Fojt #endif
2025680a9cb8SJohn Marino };
2026*09d4459fSDaniel Fojt /* Keep these in sync with the 'matchers' table. */
2027*09d4459fSDaniel Fojt enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
202895b7b453SJohn Marino
2029*09d4459fSDaniel Fojt /* Return the index of the matcher corresponding to M if available.
2030*09d4459fSDaniel Fojt MATCHER is the index of the previous matcher, or -1 if none.
2031*09d4459fSDaniel Fojt Exit in case of conflicts or if M is not available. */
2032*09d4459fSDaniel Fojt static int
setmatcher(char const * m,int matcher)2033*09d4459fSDaniel Fojt setmatcher (char const *m, int matcher)
2034680a9cb8SJohn Marino {
2035*09d4459fSDaniel Fojt for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
2036*09d4459fSDaniel Fojt if (STREQ (m, matchers[i].name))
2037680a9cb8SJohn Marino {
2038*09d4459fSDaniel Fojt if (0 <= matcher && matcher != i)
2039*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
2040*09d4459fSDaniel Fojt return i;
2041680a9cb8SJohn Marino }
2042680a9cb8SJohn Marino
2043*09d4459fSDaniel Fojt #if !HAVE_LIBPCRE
2044*09d4459fSDaniel Fojt if (STREQ (m, "perl"))
2045*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0,
2046*09d4459fSDaniel Fojt _("Perl matching not supported in a --disable-perl-regexp build"));
2047*09d4459fSDaniel Fojt #endif
2048*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
2049680a9cb8SJohn Marino }
2050680a9cb8SJohn Marino
2051680a9cb8SJohn Marino /* Find the white-space-separated options specified by OPTIONS, and
2052680a9cb8SJohn Marino using BUF to store copies of these options, set ARGV[0], ARGV[1],
2053680a9cb8SJohn Marino etc. to the option copies. Return the number N of options found.
2054680a9cb8SJohn Marino Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
2055680a9cb8SJohn Marino etc. Backslash can be used to escape whitespace (and backslashes). */
2056680a9cb8SJohn Marino static size_t
prepend_args(char const * options,char * buf,char ** argv)2057680a9cb8SJohn Marino prepend_args (char const *options, char *buf, char **argv)
2058680a9cb8SJohn Marino {
2059680a9cb8SJohn Marino char const *o = options;
2060680a9cb8SJohn Marino char *b = buf;
2061680a9cb8SJohn Marino size_t n = 0;
2062680a9cb8SJohn Marino
2063680a9cb8SJohn Marino for (;;)
2064680a9cb8SJohn Marino {
2065680a9cb8SJohn Marino while (c_isspace (to_uchar (*o)))
2066680a9cb8SJohn Marino o++;
2067680a9cb8SJohn Marino if (!*o)
2068680a9cb8SJohn Marino return n;
2069680a9cb8SJohn Marino if (argv)
2070680a9cb8SJohn Marino argv[n] = b;
2071680a9cb8SJohn Marino n++;
2072680a9cb8SJohn Marino
2073680a9cb8SJohn Marino do
2074680a9cb8SJohn Marino if ((*b++ = *o++) == '\\' && *o)
2075680a9cb8SJohn Marino b[-1] = *o++;
2076680a9cb8SJohn Marino while (*o && ! c_isspace (to_uchar (*o)));
2077680a9cb8SJohn Marino
2078680a9cb8SJohn Marino *b++ = '\0';
2079680a9cb8SJohn Marino }
2080680a9cb8SJohn Marino }
2081680a9cb8SJohn Marino
2082680a9cb8SJohn Marino /* Prepend the whitespace-separated options in OPTIONS to the argument
2083680a9cb8SJohn Marino vector of a main program with argument count *PARGC and argument
2084680a9cb8SJohn Marino vector *PARGV. Return the number of options prepended. */
2085680a9cb8SJohn Marino static int
prepend_default_options(char const * options,int * pargc,char *** pargv)2086680a9cb8SJohn Marino prepend_default_options (char const *options, int *pargc, char ***pargv)
2087680a9cb8SJohn Marino {
2088680a9cb8SJohn Marino if (options && *options)
2089680a9cb8SJohn Marino {
2090680a9cb8SJohn Marino char *buf = xmalloc (strlen (options) + 1);
2091680a9cb8SJohn Marino size_t prepended = prepend_args (options, buf, NULL);
2092680a9cb8SJohn Marino int argc = *pargc;
2093680a9cb8SJohn Marino char *const *argv = *pargv;
2094680a9cb8SJohn Marino char **pp;
2095680a9cb8SJohn Marino enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
2096680a9cb8SJohn Marino if (MAX_ARGS - argc < prepended)
2097680a9cb8SJohn Marino xalloc_die ();
2098680a9cb8SJohn Marino pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
2099680a9cb8SJohn Marino *pargc = prepended + argc;
2100680a9cb8SJohn Marino *pargv = pp;
2101680a9cb8SJohn Marino *pp++ = *argv++;
2102680a9cb8SJohn Marino pp += prepend_args (options, buf, pp);
2103680a9cb8SJohn Marino while ((*pp++ = *argv++))
2104680a9cb8SJohn Marino continue;
2105680a9cb8SJohn Marino return prepended;
2106680a9cb8SJohn Marino }
2107680a9cb8SJohn Marino
2108680a9cb8SJohn Marino return 0;
2109680a9cb8SJohn Marino }
2110680a9cb8SJohn Marino
2111680a9cb8SJohn Marino /* Get the next non-digit option from ARGC and ARGV.
2112680a9cb8SJohn Marino Return -1 if there are no more options.
2113680a9cb8SJohn Marino Process any digit options that were encountered on the way,
2114680a9cb8SJohn Marino and store the resulting integer into *DEFAULT_CONTEXT. */
2115680a9cb8SJohn Marino static int
get_nondigit_option(int argc,char * const * argv,intmax_t * default_context)2116680a9cb8SJohn Marino get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
2117680a9cb8SJohn Marino {
2118680a9cb8SJohn Marino static int prev_digit_optind = -1;
2119dc7c36e4SJohn Marino int this_digit_optind;
2120dc7c36e4SJohn Marino bool was_digit;
2121680a9cb8SJohn Marino char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2122680a9cb8SJohn Marino char *p = buf;
2123680a9cb8SJohn Marino int opt;
2124680a9cb8SJohn Marino
2125dc7c36e4SJohn Marino was_digit = false;
2126680a9cb8SJohn Marino this_digit_optind = optind;
2127dc7c36e4SJohn Marino while (true)
2128680a9cb8SJohn Marino {
2129680a9cb8SJohn Marino opt = getopt_long (argc, (char **) argv, short_options,
2130680a9cb8SJohn Marino long_options, NULL);
2131*09d4459fSDaniel Fojt if (! c_isdigit (opt))
2132680a9cb8SJohn Marino break;
2133680a9cb8SJohn Marino
2134680a9cb8SJohn Marino if (prev_digit_optind != this_digit_optind || !was_digit)
2135680a9cb8SJohn Marino {
2136680a9cb8SJohn Marino /* Reset to start another context length argument. */
2137680a9cb8SJohn Marino p = buf;
2138680a9cb8SJohn Marino }
2139680a9cb8SJohn Marino else
2140680a9cb8SJohn Marino {
2141680a9cb8SJohn Marino /* Suppress trivial leading zeros, to avoid incorrect
2142680a9cb8SJohn Marino diagnostic on strings like 00000000000. */
2143680a9cb8SJohn Marino p -= buf[0] == '0';
2144680a9cb8SJohn Marino }
2145680a9cb8SJohn Marino
2146680a9cb8SJohn Marino if (p == buf + sizeof buf - 4)
2147680a9cb8SJohn Marino {
2148680a9cb8SJohn Marino /* Too many digits. Append "..." to make context_length_arg
2149680a9cb8SJohn Marino complain about "X...", where X contains the digits seen
2150680a9cb8SJohn Marino so far. */
2151680a9cb8SJohn Marino strcpy (p, "...");
2152680a9cb8SJohn Marino p += 3;
2153680a9cb8SJohn Marino break;
2154680a9cb8SJohn Marino }
2155680a9cb8SJohn Marino *p++ = opt;
2156680a9cb8SJohn Marino
2157dc7c36e4SJohn Marino was_digit = true;
2158680a9cb8SJohn Marino prev_digit_optind = this_digit_optind;
2159680a9cb8SJohn Marino this_digit_optind = optind;
2160680a9cb8SJohn Marino }
2161680a9cb8SJohn Marino if (p != buf)
2162680a9cb8SJohn Marino {
2163680a9cb8SJohn Marino *p = '\0';
2164680a9cb8SJohn Marino context_length_arg (buf, default_context);
2165680a9cb8SJohn Marino }
2166680a9cb8SJohn Marino
2167680a9cb8SJohn Marino return opt;
2168680a9cb8SJohn Marino }
2169680a9cb8SJohn Marino
2170680a9cb8SJohn Marino /* Parse GREP_COLORS. The default would look like:
2171680a9cb8SJohn Marino GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2172680a9cb8SJohn Marino with boolean capabilities (ne and rv) unset (i.e., omitted).
2173680a9cb8SJohn Marino No character escaping is needed or supported. */
2174680a9cb8SJohn Marino static void
parse_grep_colors(void)2175680a9cb8SJohn Marino parse_grep_colors (void)
2176680a9cb8SJohn Marino {
2177680a9cb8SJohn Marino const char *p;
2178680a9cb8SJohn Marino char *q;
2179680a9cb8SJohn Marino char *name;
2180680a9cb8SJohn Marino char *val;
2181680a9cb8SJohn Marino
2182680a9cb8SJohn Marino p = getenv ("GREP_COLORS"); /* Plural! */
2183680a9cb8SJohn Marino if (p == NULL || *p == '\0')
2184680a9cb8SJohn Marino return;
2185680a9cb8SJohn Marino
2186680a9cb8SJohn Marino /* Work off a writable copy. */
2187680a9cb8SJohn Marino q = xstrdup (p);
2188680a9cb8SJohn Marino
2189680a9cb8SJohn Marino name = q;
2190680a9cb8SJohn Marino val = NULL;
2191680a9cb8SJohn Marino /* From now on, be well-formed or you're gone. */
2192680a9cb8SJohn Marino for (;;)
2193680a9cb8SJohn Marino if (*q == ':' || *q == '\0')
2194680a9cb8SJohn Marino {
2195680a9cb8SJohn Marino char c = *q;
2196680a9cb8SJohn Marino struct color_cap const *cap;
2197680a9cb8SJohn Marino
2198680a9cb8SJohn Marino *q++ = '\0'; /* Terminate name or val. */
2199680a9cb8SJohn Marino /* Empty name without val (empty cap)
2200680a9cb8SJohn Marino * won't match and will be ignored. */
2201680a9cb8SJohn Marino for (cap = color_dict; cap->name; cap++)
2202680a9cb8SJohn Marino if (STREQ (cap->name, name))
2203680a9cb8SJohn Marino break;
2204680a9cb8SJohn Marino /* If name unknown, go on for forward compatibility. */
2205680a9cb8SJohn Marino if (cap->var && val)
2206680a9cb8SJohn Marino *(cap->var) = val;
2207680a9cb8SJohn Marino if (cap->fct)
2208680a9cb8SJohn Marino cap->fct ();
2209680a9cb8SJohn Marino if (c == '\0')
2210680a9cb8SJohn Marino return;
2211680a9cb8SJohn Marino name = q;
2212680a9cb8SJohn Marino val = NULL;
2213680a9cb8SJohn Marino }
2214680a9cb8SJohn Marino else if (*q == '=')
2215680a9cb8SJohn Marino {
2216680a9cb8SJohn Marino if (q == name || val)
2217680a9cb8SJohn Marino return;
2218680a9cb8SJohn Marino *q++ = '\0'; /* Terminate name. */
2219680a9cb8SJohn Marino val = q; /* Can be the empty string. */
2220680a9cb8SJohn Marino }
2221680a9cb8SJohn Marino else if (val == NULL)
2222680a9cb8SJohn Marino q++; /* Accumulate name. */
2223*09d4459fSDaniel Fojt else if (*q == ';' || c_isdigit (*q))
2224680a9cb8SJohn Marino q++; /* Accumulate val. Protect the terminal from being sent crap. */
2225680a9cb8SJohn Marino else
2226680a9cb8SJohn Marino return;
2227680a9cb8SJohn Marino }
2228680a9cb8SJohn Marino
2229680a9cb8SJohn Marino /* Return true if PAT (of length PATLEN) contains an encoding error. */
2230680a9cb8SJohn Marino static bool
contains_encoding_error(char const * pat,size_t patlen)2231680a9cb8SJohn Marino contains_encoding_error (char const *pat, size_t patlen)
2232680a9cb8SJohn Marino {
2233680a9cb8SJohn Marino mbstate_t mbs = { 0 };
2234680a9cb8SJohn Marino size_t i, charlen;
2235680a9cb8SJohn Marino
2236dc7c36e4SJohn Marino for (i = 0; i < patlen; i += charlen)
2237680a9cb8SJohn Marino {
2238dc7c36e4SJohn Marino charlen = mb_clen (pat + i, patlen - i, &mbs);
2239680a9cb8SJohn Marino if ((size_t) -2 <= charlen)
2240680a9cb8SJohn Marino return true;
2241680a9cb8SJohn Marino }
2242680a9cb8SJohn Marino return false;
2243680a9cb8SJohn Marino }
2244680a9cb8SJohn Marino
2245*09d4459fSDaniel Fojt /* Return the number of bytes in the initial character of PAT, of size
2246*09d4459fSDaniel Fojt PATLEN, if Fcompile can handle that character. Return -1 if
2247*09d4459fSDaniel Fojt Fcompile cannot handle it. MBS is the multibyte conversion state.
2248*09d4459fSDaniel Fojt
2249*09d4459fSDaniel Fojt Fcompile can handle a character C if C is single-byte, or if C has no
2250*09d4459fSDaniel Fojt case folded counterparts and toupper translates none of its bytes. */
2251*09d4459fSDaniel Fojt
2252*09d4459fSDaniel Fojt static int
fgrep_icase_charlen(char const * pat,size_t patlen,mbstate_t * mbs)2253*09d4459fSDaniel Fojt fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
2254680a9cb8SJohn Marino {
2255*09d4459fSDaniel Fojt int n = localeinfo.sbclen[to_uchar (*pat)];
2256*09d4459fSDaniel Fojt if (n < 0)
2257*09d4459fSDaniel Fojt {
2258*09d4459fSDaniel Fojt wchar_t wc;
2259*09d4459fSDaniel Fojt wchar_t folded[CASE_FOLDED_BUFSIZE];
2260*09d4459fSDaniel Fojt size_t wn = mbrtowc (&wc, pat, patlen, mbs);
2261*09d4459fSDaniel Fojt if (MB_LEN_MAX < wn || case_folded_counterparts (wc, folded))
2262*09d4459fSDaniel Fojt return -1;
2263*09d4459fSDaniel Fojt for (int i = wn; 0 < --i; )
2264*09d4459fSDaniel Fojt {
2265*09d4459fSDaniel Fojt unsigned char c = pat[i];
2266*09d4459fSDaniel Fojt if (toupper (c) != c)
2267*09d4459fSDaniel Fojt return -1;
2268*09d4459fSDaniel Fojt }
2269*09d4459fSDaniel Fojt n = wn;
2270*09d4459fSDaniel Fojt }
2271*09d4459fSDaniel Fojt return n;
2272*09d4459fSDaniel Fojt }
2273*09d4459fSDaniel Fojt
2274*09d4459fSDaniel Fojt /* Return true if the -F patterns PAT, of size PATLEN, contain only
2275*09d4459fSDaniel Fojt single-byte characters or characters not subject to case folding,
2276*09d4459fSDaniel Fojt and so can be processed by Fcompile. */
2277*09d4459fSDaniel Fojt
2278*09d4459fSDaniel Fojt static bool
fgrep_icase_available(char const * pat,size_t patlen)2279*09d4459fSDaniel Fojt fgrep_icase_available (char const *pat, size_t patlen)
2280*09d4459fSDaniel Fojt {
2281*09d4459fSDaniel Fojt mbstate_t mbs = {0,};
2282*09d4459fSDaniel Fojt
2283*09d4459fSDaniel Fojt for (size_t i = 0; i < patlen; )
2284*09d4459fSDaniel Fojt {
2285*09d4459fSDaniel Fojt int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
2286*09d4459fSDaniel Fojt if (n < 0)
2287*09d4459fSDaniel Fojt return false;
2288*09d4459fSDaniel Fojt i += n;
2289*09d4459fSDaniel Fojt }
2290*09d4459fSDaniel Fojt
2291*09d4459fSDaniel Fojt return true;
2292*09d4459fSDaniel Fojt }
2293*09d4459fSDaniel Fojt
2294*09d4459fSDaniel Fojt /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */
2295*09d4459fSDaniel Fojt
2296*09d4459fSDaniel Fojt void
fgrep_to_grep_pattern(char ** keys_p,size_t * len_p)2297*09d4459fSDaniel Fojt fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
2298*09d4459fSDaniel Fojt {
2299*09d4459fSDaniel Fojt size_t len = *len_p;
2300*09d4459fSDaniel Fojt char *keys = *keys_p;
2301680a9cb8SJohn Marino mbstate_t mb_state = { 0 };
2302*09d4459fSDaniel Fojt char *new_keys = xnmalloc (len + 1, 2);
2303*09d4459fSDaniel Fojt char *p = new_keys;
2304680a9cb8SJohn Marino size_t n;
2305680a9cb8SJohn Marino
2306680a9cb8SJohn Marino for (; len; keys += n, len -= n)
2307680a9cb8SJohn Marino {
2308dc7c36e4SJohn Marino n = mb_clen (keys, len, &mb_state);
2309680a9cb8SJohn Marino switch (n)
2310680a9cb8SJohn Marino {
2311680a9cb8SJohn Marino case (size_t) -2:
2312680a9cb8SJohn Marino n = len;
2313*09d4459fSDaniel Fojt FALLTHROUGH;
2314680a9cb8SJohn Marino default:
2315680a9cb8SJohn Marino p = mempcpy (p, keys, n);
2316680a9cb8SJohn Marino break;
2317680a9cb8SJohn Marino
2318680a9cb8SJohn Marino case (size_t) -1:
2319680a9cb8SJohn Marino memset (&mb_state, 0, sizeof mb_state);
2320680a9cb8SJohn Marino n = 1;
2321*09d4459fSDaniel Fojt FALLTHROUGH;
2322*09d4459fSDaniel Fojt case 1:
2323*09d4459fSDaniel Fojt switch (*keys)
2324*09d4459fSDaniel Fojt {
2325*09d4459fSDaniel Fojt case '$': case '*': case '.': case '[': case '\\': case '^':
2326*09d4459fSDaniel Fojt *p++ = '\\'; break;
2327*09d4459fSDaniel Fojt }
2328*09d4459fSDaniel Fojt *p++ = *keys;
2329680a9cb8SJohn Marino break;
2330680a9cb8SJohn Marino }
2331680a9cb8SJohn Marino }
2332680a9cb8SJohn Marino
2333*09d4459fSDaniel Fojt free (*keys_p);
2334*09d4459fSDaniel Fojt *keys_p = new_keys;
2335*09d4459fSDaniel Fojt *len_p = p - new_keys;
2336*09d4459fSDaniel Fojt }
2337*09d4459fSDaniel Fojt
2338*09d4459fSDaniel Fojt /* If it is easy, convert the MATCHER-style patterns KEYS (of size
2339*09d4459fSDaniel Fojt *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
2340*09d4459fSDaniel Fojt return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and
2341*09d4459fSDaniel Fojt return MATCHER. This function is conservative and sometimes misses
2342*09d4459fSDaniel Fojt conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
2343*09d4459fSDaniel Fojt to the -F pattern "a". */
2344*09d4459fSDaniel Fojt
2345*09d4459fSDaniel Fojt static int
try_fgrep_pattern(int matcher,char * keys,size_t * len_p)2346*09d4459fSDaniel Fojt try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
2347*09d4459fSDaniel Fojt {
2348*09d4459fSDaniel Fojt int result = matcher;
2349*09d4459fSDaniel Fojt size_t len = *len_p;
2350*09d4459fSDaniel Fojt char *new_keys = xmalloc (len + 1);
2351*09d4459fSDaniel Fojt char *p = new_keys;
2352*09d4459fSDaniel Fojt char const *q = keys;
2353*09d4459fSDaniel Fojt mbstate_t mb_state = { 0 };
2354*09d4459fSDaniel Fojt
2355*09d4459fSDaniel Fojt while (len != 0)
2356*09d4459fSDaniel Fojt {
2357*09d4459fSDaniel Fojt switch (*q)
2358*09d4459fSDaniel Fojt {
2359*09d4459fSDaniel Fojt case '$': case '*': case '.': case '[': case '^':
2360*09d4459fSDaniel Fojt goto fail;
2361*09d4459fSDaniel Fojt
2362*09d4459fSDaniel Fojt case '(': case '+': case '?': case '{': case '|':
2363*09d4459fSDaniel Fojt if (matcher != G_MATCHER_INDEX)
2364*09d4459fSDaniel Fojt goto fail;
2365*09d4459fSDaniel Fojt break;
2366*09d4459fSDaniel Fojt
2367*09d4459fSDaniel Fojt case '\\':
2368*09d4459fSDaniel Fojt if (1 < len)
2369*09d4459fSDaniel Fojt switch (q[1])
2370*09d4459fSDaniel Fojt {
2371*09d4459fSDaniel Fojt case '\n':
2372*09d4459fSDaniel Fojt case 'B': case 'S': case 'W': case'\'': case '<':
2373*09d4459fSDaniel Fojt case 'b': case 's': case 'w': case '`': case '>':
2374*09d4459fSDaniel Fojt case '1': case '2': case '3': case '4':
2375*09d4459fSDaniel Fojt case '5': case '6': case '7': case '8': case '9':
2376*09d4459fSDaniel Fojt goto fail;
2377*09d4459fSDaniel Fojt
2378*09d4459fSDaniel Fojt case '(': case '+': case '?': case '{': case '|':
2379*09d4459fSDaniel Fojt if (matcher == G_MATCHER_INDEX)
2380*09d4459fSDaniel Fojt goto fail;
2381*09d4459fSDaniel Fojt FALLTHROUGH;
2382*09d4459fSDaniel Fojt default:
2383*09d4459fSDaniel Fojt q++, len--;
2384*09d4459fSDaniel Fojt break;
2385*09d4459fSDaniel Fojt }
2386*09d4459fSDaniel Fojt break;
2387*09d4459fSDaniel Fojt }
2388*09d4459fSDaniel Fojt
2389*09d4459fSDaniel Fojt {
2390*09d4459fSDaniel Fojt size_t n;
2391*09d4459fSDaniel Fojt if (match_icase)
2392*09d4459fSDaniel Fojt {
2393*09d4459fSDaniel Fojt int ni = fgrep_icase_charlen (q, len, &mb_state);
2394*09d4459fSDaniel Fojt if (ni < 0)
2395*09d4459fSDaniel Fojt goto fail;
2396*09d4459fSDaniel Fojt n = ni;
2397*09d4459fSDaniel Fojt }
2398*09d4459fSDaniel Fojt else
2399*09d4459fSDaniel Fojt {
2400*09d4459fSDaniel Fojt n = mb_clen (q, len, &mb_state);
2401*09d4459fSDaniel Fojt if (MB_LEN_MAX < n)
2402*09d4459fSDaniel Fojt goto fail;
2403*09d4459fSDaniel Fojt }
2404*09d4459fSDaniel Fojt
2405*09d4459fSDaniel Fojt p = mempcpy (p, q, n);
2406*09d4459fSDaniel Fojt q += n;
2407*09d4459fSDaniel Fojt len -= n;
2408*09d4459fSDaniel Fojt }
2409*09d4459fSDaniel Fojt }
2410*09d4459fSDaniel Fojt
2411*09d4459fSDaniel Fojt if (*len_p != p - new_keys)
2412*09d4459fSDaniel Fojt {
2413*09d4459fSDaniel Fojt *len_p = p - new_keys;
2414*09d4459fSDaniel Fojt memcpy (keys, new_keys, p - new_keys);
2415*09d4459fSDaniel Fojt }
2416*09d4459fSDaniel Fojt result = F_MATCHER_INDEX;
2417*09d4459fSDaniel Fojt
2418*09d4459fSDaniel Fojt fail:
2419*09d4459fSDaniel Fojt free (new_keys);
2420*09d4459fSDaniel Fojt return result;
2421680a9cb8SJohn Marino }
2422680a9cb8SJohn Marino
2423680a9cb8SJohn Marino int
main(int argc,char ** argv)2424680a9cb8SJohn Marino main (int argc, char **argv)
2425680a9cb8SJohn Marino {
2426*09d4459fSDaniel Fojt char *keys = NULL;
2427*09d4459fSDaniel Fojt size_t keycc = 0, oldcc, keyalloc = 0;
2428*09d4459fSDaniel Fojt int matcher = -1;
2429680a9cb8SJohn Marino size_t cc;
2430dc7c36e4SJohn Marino int opt, prepended;
2431680a9cb8SJohn Marino int prev_optind, last_recursive;
2432680a9cb8SJohn Marino int fread_errno;
2433680a9cb8SJohn Marino intmax_t default_context;
2434680a9cb8SJohn Marino FILE *fp;
2435680a9cb8SJohn Marino exit_failure = EXIT_TROUBLE;
2436680a9cb8SJohn Marino initialize_main (&argc, &argv);
2437680a9cb8SJohn Marino
2438*09d4459fSDaniel Fojt /* Which command-line options have been specified for filename output.
2439*09d4459fSDaniel Fojt -1 for -h, 1 for -H, 0 for neither. */
2440*09d4459fSDaniel Fojt int filename_option = 0;
2441*09d4459fSDaniel Fojt
2442680a9cb8SJohn Marino eolbyte = '\n';
2443680a9cb8SJohn Marino filename_mask = ~0;
2444680a9cb8SJohn Marino
2445680a9cb8SJohn Marino max_count = INTMAX_MAX;
2446680a9cb8SJohn Marino
2447680a9cb8SJohn Marino /* The value -1 means to use DEFAULT_CONTEXT. */
2448680a9cb8SJohn Marino out_after = out_before = -1;
2449680a9cb8SJohn Marino /* Default before/after context: changed by -C/-NUM options */
2450680a9cb8SJohn Marino default_context = -1;
2451680a9cb8SJohn Marino /* Changed by -o option */
2452dc7c36e4SJohn Marino only_matching = false;
2453680a9cb8SJohn Marino
2454680a9cb8SJohn Marino /* Internationalization. */
2455680a9cb8SJohn Marino #if defined HAVE_SETLOCALE
2456680a9cb8SJohn Marino setlocale (LC_ALL, "");
2457680a9cb8SJohn Marino #endif
2458680a9cb8SJohn Marino #if defined ENABLE_NLS
2459680a9cb8SJohn Marino bindtextdomain (PACKAGE, LOCALEDIR);
2460680a9cb8SJohn Marino textdomain (PACKAGE);
2461680a9cb8SJohn Marino #endif
2462680a9cb8SJohn Marino
2463*09d4459fSDaniel Fojt init_localeinfo (&localeinfo);
2464*09d4459fSDaniel Fojt
2465680a9cb8SJohn Marino atexit (clean_up_stdout);
2466*09d4459fSDaniel Fojt c_stack_action (NULL);
2467680a9cb8SJohn Marino
2468680a9cb8SJohn Marino last_recursive = 0;
2469dc7c36e4SJohn Marino
2470680a9cb8SJohn Marino prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
2471dc7c36e4SJohn Marino if (prepended)
2472dc7c36e4SJohn Marino error (0, 0, _("warning: GREP_OPTIONS is deprecated;"
2473dc7c36e4SJohn Marino " please use an alias or script"));
2474dc7c36e4SJohn Marino
2475680a9cb8SJohn Marino while (prev_optind = optind,
2476680a9cb8SJohn Marino (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2477680a9cb8SJohn Marino switch (opt)
2478680a9cb8SJohn Marino {
2479680a9cb8SJohn Marino case 'A':
2480680a9cb8SJohn Marino context_length_arg (optarg, &out_after);
2481680a9cb8SJohn Marino break;
2482680a9cb8SJohn Marino
2483680a9cb8SJohn Marino case 'B':
2484680a9cb8SJohn Marino context_length_arg (optarg, &out_before);
2485680a9cb8SJohn Marino break;
2486680a9cb8SJohn Marino
2487680a9cb8SJohn Marino case 'C':
2488680a9cb8SJohn Marino /* Set output match context, but let any explicit leading or
2489680a9cb8SJohn Marino trailing amount specified with -A or -B stand. */
2490680a9cb8SJohn Marino context_length_arg (optarg, &default_context);
2491680a9cb8SJohn Marino break;
2492680a9cb8SJohn Marino
2493680a9cb8SJohn Marino case 'D':
2494680a9cb8SJohn Marino if (STREQ (optarg, "read"))
2495680a9cb8SJohn Marino devices = READ_DEVICES;
2496680a9cb8SJohn Marino else if (STREQ (optarg, "skip"))
2497680a9cb8SJohn Marino devices = SKIP_DEVICES;
2498680a9cb8SJohn Marino else
2499*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, _("unknown devices method"));
2500680a9cb8SJohn Marino break;
2501680a9cb8SJohn Marino
2502680a9cb8SJohn Marino case 'E':
2503*09d4459fSDaniel Fojt matcher = setmatcher ("egrep", matcher);
2504680a9cb8SJohn Marino break;
2505680a9cb8SJohn Marino
2506680a9cb8SJohn Marino case 'F':
2507*09d4459fSDaniel Fojt matcher = setmatcher ("fgrep", matcher);
2508680a9cb8SJohn Marino break;
2509680a9cb8SJohn Marino
2510680a9cb8SJohn Marino case 'P':
2511*09d4459fSDaniel Fojt matcher = setmatcher ("perl", matcher);
2512680a9cb8SJohn Marino break;
2513680a9cb8SJohn Marino
2514680a9cb8SJohn Marino case 'G':
2515*09d4459fSDaniel Fojt matcher = setmatcher ("grep", matcher);
2516680a9cb8SJohn Marino break;
2517680a9cb8SJohn Marino
2518680a9cb8SJohn Marino case 'X': /* undocumented on purpose */
2519*09d4459fSDaniel Fojt matcher = setmatcher (optarg, matcher);
2520680a9cb8SJohn Marino break;
2521680a9cb8SJohn Marino
2522680a9cb8SJohn Marino case 'H':
2523*09d4459fSDaniel Fojt filename_option = 1;
2524680a9cb8SJohn Marino break;
2525680a9cb8SJohn Marino
2526680a9cb8SJohn Marino case 'I':
2527680a9cb8SJohn Marino binary_files = WITHOUT_MATCH_BINARY_FILES;
2528680a9cb8SJohn Marino break;
2529680a9cb8SJohn Marino
2530680a9cb8SJohn Marino case 'T':
2531dc7c36e4SJohn Marino align_tabs = true;
2532680a9cb8SJohn Marino break;
2533680a9cb8SJohn Marino
2534680a9cb8SJohn Marino case 'U':
2535*09d4459fSDaniel Fojt if (O_BINARY)
2536*09d4459fSDaniel Fojt binary = true;
2537680a9cb8SJohn Marino break;
2538680a9cb8SJohn Marino
2539680a9cb8SJohn Marino case 'u':
2540*09d4459fSDaniel Fojt /* Obsolete option; it has no effect. FIXME: Diagnose use of
2541*09d4459fSDaniel Fojt this option starting in (say) the year 2020. */
2542680a9cb8SJohn Marino break;
2543680a9cb8SJohn Marino
2544680a9cb8SJohn Marino case 'V':
2545dc7c36e4SJohn Marino show_version = true;
2546680a9cb8SJohn Marino break;
2547680a9cb8SJohn Marino
2548680a9cb8SJohn Marino case 'a':
2549680a9cb8SJohn Marino binary_files = TEXT_BINARY_FILES;
2550680a9cb8SJohn Marino break;
2551680a9cb8SJohn Marino
2552680a9cb8SJohn Marino case 'b':
2553dc7c36e4SJohn Marino out_byte = true;
2554680a9cb8SJohn Marino break;
2555680a9cb8SJohn Marino
2556680a9cb8SJohn Marino case 'c':
2557dc7c36e4SJohn Marino count_matches = true;
2558680a9cb8SJohn Marino break;
2559680a9cb8SJohn Marino
2560680a9cb8SJohn Marino case 'd':
2561680a9cb8SJohn Marino directories = XARGMATCH ("--directories", optarg,
2562680a9cb8SJohn Marino directories_args, directories_types);
2563680a9cb8SJohn Marino if (directories == RECURSE_DIRECTORIES)
2564680a9cb8SJohn Marino last_recursive = prev_optind;
2565680a9cb8SJohn Marino break;
2566680a9cb8SJohn Marino
2567680a9cb8SJohn Marino case 'e':
2568680a9cb8SJohn Marino cc = strlen (optarg);
2569*09d4459fSDaniel Fojt if (keyalloc < keycc + cc + 1)
2570*09d4459fSDaniel Fojt {
2571*09d4459fSDaniel Fojt keyalloc = keycc + cc + 1;
2572*09d4459fSDaniel Fojt keys = x2realloc (keys, &keyalloc);
2573*09d4459fSDaniel Fojt }
2574*09d4459fSDaniel Fojt oldcc = keycc;
2575*09d4459fSDaniel Fojt memcpy (keys + oldcc, optarg, cc);
2576680a9cb8SJohn Marino keycc += cc;
2577680a9cb8SJohn Marino keys[keycc++] = '\n';
2578*09d4459fSDaniel Fojt fl_add (keys + oldcc, cc + 1, "");
2579680a9cb8SJohn Marino break;
2580680a9cb8SJohn Marino
2581680a9cb8SJohn Marino case 'f':
2582*09d4459fSDaniel Fojt if (STREQ (optarg, "-"))
2583680a9cb8SJohn Marino {
2584*09d4459fSDaniel Fojt if (binary)
2585*09d4459fSDaniel Fojt xset_binary_mode (STDIN_FILENO, O_BINARY);
2586*09d4459fSDaniel Fojt fp = stdin;
2587*09d4459fSDaniel Fojt }
2588*09d4459fSDaniel Fojt else
2589*09d4459fSDaniel Fojt {
2590*09d4459fSDaniel Fojt fp = fopen (optarg, binary ? "rb" : "r");
2591*09d4459fSDaniel Fojt if (!fp)
2592*09d4459fSDaniel Fojt die (EXIT_TROUBLE, errno, "%s", optarg);
2593*09d4459fSDaniel Fojt }
2594*09d4459fSDaniel Fojt oldcc = keycc;
2595*09d4459fSDaniel Fojt for (;; keycc += cc)
2596*09d4459fSDaniel Fojt {
2597*09d4459fSDaniel Fojt if (keyalloc <= keycc + 1)
2598*09d4459fSDaniel Fojt keys = x2realloc (keys, &keyalloc);
2599*09d4459fSDaniel Fojt cc = fread (keys + keycc, 1, keyalloc - (keycc + 1), fp);
2600*09d4459fSDaniel Fojt if (cc == 0)
2601*09d4459fSDaniel Fojt break;
2602680a9cb8SJohn Marino }
2603680a9cb8SJohn Marino fread_errno = errno;
2604680a9cb8SJohn Marino if (ferror (fp))
2605*09d4459fSDaniel Fojt die (EXIT_TROUBLE, fread_errno, "%s", optarg);
2606680a9cb8SJohn Marino if (fp != stdin)
2607680a9cb8SJohn Marino fclose (fp);
2608680a9cb8SJohn Marino /* Append final newline if file ended in non-newline. */
2609680a9cb8SJohn Marino if (oldcc != keycc && keys[keycc - 1] != '\n')
2610680a9cb8SJohn Marino keys[keycc++] = '\n';
2611*09d4459fSDaniel Fojt fl_add (keys + oldcc, keycc - oldcc, optarg);
2612680a9cb8SJohn Marino break;
2613680a9cb8SJohn Marino
2614680a9cb8SJohn Marino case 'h':
2615*09d4459fSDaniel Fojt filename_option = -1;
2616680a9cb8SJohn Marino break;
2617680a9cb8SJohn Marino
2618680a9cb8SJohn Marino case 'i':
2619680a9cb8SJohn Marino case 'y': /* For old-timers . . . */
2620dc7c36e4SJohn Marino match_icase = true;
2621680a9cb8SJohn Marino break;
2622680a9cb8SJohn Marino
2623*09d4459fSDaniel Fojt case NO_IGNORE_CASE_OPTION:
2624*09d4459fSDaniel Fojt match_icase = false;
2625*09d4459fSDaniel Fojt break;
2626*09d4459fSDaniel Fojt
2627680a9cb8SJohn Marino case 'L':
2628680a9cb8SJohn Marino /* Like -l, except list files that don't contain matches.
2629680a9cb8SJohn Marino Inspired by the same option in Hume's gre. */
2630*09d4459fSDaniel Fojt list_files = LISTFILES_NONMATCHING;
2631680a9cb8SJohn Marino break;
2632680a9cb8SJohn Marino
2633680a9cb8SJohn Marino case 'l':
2634*09d4459fSDaniel Fojt list_files = LISTFILES_MATCHING;
2635680a9cb8SJohn Marino break;
2636680a9cb8SJohn Marino
2637680a9cb8SJohn Marino case 'm':
2638680a9cb8SJohn Marino switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2639680a9cb8SJohn Marino {
2640680a9cb8SJohn Marino case LONGINT_OK:
2641680a9cb8SJohn Marino case LONGINT_OVERFLOW:
2642680a9cb8SJohn Marino break;
2643680a9cb8SJohn Marino
2644680a9cb8SJohn Marino default:
2645*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, _("invalid max count"));
2646680a9cb8SJohn Marino }
2647680a9cb8SJohn Marino break;
2648680a9cb8SJohn Marino
2649680a9cb8SJohn Marino case 'n':
2650dc7c36e4SJohn Marino out_line = true;
2651680a9cb8SJohn Marino break;
2652680a9cb8SJohn Marino
2653680a9cb8SJohn Marino case 'o':
2654dc7c36e4SJohn Marino only_matching = true;
2655680a9cb8SJohn Marino break;
2656680a9cb8SJohn Marino
2657680a9cb8SJohn Marino case 'q':
2658dc7c36e4SJohn Marino exit_on_match = true;
2659680a9cb8SJohn Marino exit_failure = 0;
2660680a9cb8SJohn Marino break;
2661680a9cb8SJohn Marino
2662680a9cb8SJohn Marino case 'R':
2663680a9cb8SJohn Marino fts_options = basic_fts_options | FTS_LOGICAL;
2664*09d4459fSDaniel Fojt FALLTHROUGH;
2665680a9cb8SJohn Marino case 'r':
2666680a9cb8SJohn Marino directories = RECURSE_DIRECTORIES;
2667680a9cb8SJohn Marino last_recursive = prev_optind;
2668680a9cb8SJohn Marino break;
2669680a9cb8SJohn Marino
2670680a9cb8SJohn Marino case 's':
2671dc7c36e4SJohn Marino suppress_errors = true;
2672680a9cb8SJohn Marino break;
2673680a9cb8SJohn Marino
2674680a9cb8SJohn Marino case 'v':
2675680a9cb8SJohn Marino out_invert = true;
2676680a9cb8SJohn Marino break;
2677680a9cb8SJohn Marino
2678680a9cb8SJohn Marino case 'w':
2679*09d4459fSDaniel Fojt wordinit ();
2680dc7c36e4SJohn Marino match_words = true;
2681680a9cb8SJohn Marino break;
2682680a9cb8SJohn Marino
2683680a9cb8SJohn Marino case 'x':
2684dc7c36e4SJohn Marino match_lines = true;
2685680a9cb8SJohn Marino break;
2686680a9cb8SJohn Marino
2687680a9cb8SJohn Marino case 'Z':
2688680a9cb8SJohn Marino filename_mask = 0;
2689680a9cb8SJohn Marino break;
2690680a9cb8SJohn Marino
2691680a9cb8SJohn Marino case 'z':
2692680a9cb8SJohn Marino eolbyte = '\0';
2693680a9cb8SJohn Marino break;
2694680a9cb8SJohn Marino
2695680a9cb8SJohn Marino case BINARY_FILES_OPTION:
2696680a9cb8SJohn Marino if (STREQ (optarg, "binary"))
2697680a9cb8SJohn Marino binary_files = BINARY_BINARY_FILES;
2698680a9cb8SJohn Marino else if (STREQ (optarg, "text"))
2699680a9cb8SJohn Marino binary_files = TEXT_BINARY_FILES;
2700680a9cb8SJohn Marino else if (STREQ (optarg, "without-match"))
2701680a9cb8SJohn Marino binary_files = WITHOUT_MATCH_BINARY_FILES;
2702680a9cb8SJohn Marino else
2703*09d4459fSDaniel Fojt die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2704680a9cb8SJohn Marino break;
2705680a9cb8SJohn Marino
2706680a9cb8SJohn Marino case COLOR_OPTION:
2707680a9cb8SJohn Marino if (optarg)
2708680a9cb8SJohn Marino {
2709*09d4459fSDaniel Fojt if (!c_strcasecmp (optarg, "always")
2710*09d4459fSDaniel Fojt || !c_strcasecmp (optarg, "yes")
2711*09d4459fSDaniel Fojt || !c_strcasecmp (optarg, "force"))
2712680a9cb8SJohn Marino color_option = 1;
2713*09d4459fSDaniel Fojt else if (!c_strcasecmp (optarg, "never")
2714*09d4459fSDaniel Fojt || !c_strcasecmp (optarg, "no")
2715*09d4459fSDaniel Fojt || !c_strcasecmp (optarg, "none"))
2716680a9cb8SJohn Marino color_option = 0;
2717*09d4459fSDaniel Fojt else if (!c_strcasecmp (optarg, "auto")
2718*09d4459fSDaniel Fojt || !c_strcasecmp (optarg, "tty")
2719*09d4459fSDaniel Fojt || !c_strcasecmp (optarg, "if-tty"))
2720680a9cb8SJohn Marino color_option = 2;
2721680a9cb8SJohn Marino else
2722680a9cb8SJohn Marino show_help = 1;
2723680a9cb8SJohn Marino }
2724680a9cb8SJohn Marino else
2725680a9cb8SJohn Marino color_option = 2;
2726680a9cb8SJohn Marino break;
2727680a9cb8SJohn Marino
2728680a9cb8SJohn Marino case EXCLUDE_OPTION:
2729680a9cb8SJohn Marino case INCLUDE_OPTION:
2730*09d4459fSDaniel Fojt for (int cmd = 0; cmd < 2; cmd++)
2731*09d4459fSDaniel Fojt {
2732*09d4459fSDaniel Fojt if (!excluded_patterns[cmd])
2733*09d4459fSDaniel Fojt excluded_patterns[cmd] = new_exclude ();
2734*09d4459fSDaniel Fojt add_exclude (excluded_patterns[cmd], optarg,
2735*09d4459fSDaniel Fojt ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
2736*09d4459fSDaniel Fojt | exclude_options (cmd)));
2737*09d4459fSDaniel Fojt }
2738680a9cb8SJohn Marino break;
2739680a9cb8SJohn Marino case EXCLUDE_FROM_OPTION:
2740*09d4459fSDaniel Fojt for (int cmd = 0; cmd < 2; cmd++)
2741680a9cb8SJohn Marino {
2742*09d4459fSDaniel Fojt if (!excluded_patterns[cmd])
2743*09d4459fSDaniel Fojt excluded_patterns[cmd] = new_exclude ();
2744*09d4459fSDaniel Fojt if (add_exclude_file (add_exclude, excluded_patterns[cmd],
2745*09d4459fSDaniel Fojt optarg, exclude_options (cmd), '\n')
2746*09d4459fSDaniel Fojt != 0)
2747*09d4459fSDaniel Fojt die (EXIT_TROUBLE, errno, "%s", optarg);
2748680a9cb8SJohn Marino }
2749680a9cb8SJohn Marino break;
2750680a9cb8SJohn Marino
2751680a9cb8SJohn Marino case EXCLUDE_DIRECTORY_OPTION:
2752680a9cb8SJohn Marino strip_trailing_slashes (optarg);
2753*09d4459fSDaniel Fojt for (int cmd = 0; cmd < 2; cmd++)
2754*09d4459fSDaniel Fojt {
2755*09d4459fSDaniel Fojt if (!excluded_directory_patterns[cmd])
2756*09d4459fSDaniel Fojt excluded_directory_patterns[cmd] = new_exclude ();
2757*09d4459fSDaniel Fojt add_exclude (excluded_directory_patterns[cmd], optarg,
2758*09d4459fSDaniel Fojt exclude_options (cmd));
2759*09d4459fSDaniel Fojt }
2760680a9cb8SJohn Marino break;
2761680a9cb8SJohn Marino
2762680a9cb8SJohn Marino case GROUP_SEPARATOR_OPTION:
2763680a9cb8SJohn Marino group_separator = optarg;
2764680a9cb8SJohn Marino break;
2765680a9cb8SJohn Marino
2766680a9cb8SJohn Marino case LINE_BUFFERED_OPTION:
2767dc7c36e4SJohn Marino line_buffered = true;
2768680a9cb8SJohn Marino break;
2769680a9cb8SJohn Marino
2770680a9cb8SJohn Marino case LABEL_OPTION:
2771680a9cb8SJohn Marino label = optarg;
2772680a9cb8SJohn Marino break;
2773680a9cb8SJohn Marino
2774680a9cb8SJohn Marino case 0:
2775680a9cb8SJohn Marino /* long options */
2776680a9cb8SJohn Marino break;
2777680a9cb8SJohn Marino
2778680a9cb8SJohn Marino default:
2779680a9cb8SJohn Marino usage (EXIT_TROUBLE);
2780680a9cb8SJohn Marino break;
2781680a9cb8SJohn Marino
2782680a9cb8SJohn Marino }
2783680a9cb8SJohn Marino
2784680a9cb8SJohn Marino if (show_version)
2785680a9cb8SJohn Marino {
2786*09d4459fSDaniel Fojt version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
2787680a9cb8SJohn Marino (char *) NULL);
2788*09d4459fSDaniel Fojt puts (_("Written by Mike Haertel and others; see\n"
2789*09d4459fSDaniel Fojt "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
2790dc7c36e4SJohn Marino return EXIT_SUCCESS;
2791680a9cb8SJohn Marino }
2792680a9cb8SJohn Marino
2793680a9cb8SJohn Marino if (show_help)
2794680a9cb8SJohn Marino usage (EXIT_SUCCESS);
2795680a9cb8SJohn Marino
2796680a9cb8SJohn Marino if (keys)
2797680a9cb8SJohn Marino {
2798680a9cb8SJohn Marino if (keycc == 0)
2799680a9cb8SJohn Marino {
2800680a9cb8SJohn Marino /* No keys were specified (e.g. -f /dev/null). Match nothing. */
2801680a9cb8SJohn Marino out_invert ^= true;
2802dc7c36e4SJohn Marino match_lines = match_words = false;
2803680a9cb8SJohn Marino }
2804680a9cb8SJohn Marino else
2805680a9cb8SJohn Marino /* Strip trailing newline. */
2806680a9cb8SJohn Marino --keycc;
2807680a9cb8SJohn Marino }
2808680a9cb8SJohn Marino else if (optind < argc)
2809680a9cb8SJohn Marino {
2810*09d4459fSDaniel Fojt /* Make a copy so that it can be reallocated or freed later. */
2811680a9cb8SJohn Marino keycc = strlen (argv[optind]);
2812680a9cb8SJohn Marino keys = xmemdup (argv[optind++], keycc + 1);
2813*09d4459fSDaniel Fojt fl_add (keys, keycc, "");
2814*09d4459fSDaniel Fojt n_patterns++;
2815680a9cb8SJohn Marino }
2816680a9cb8SJohn Marino else
2817680a9cb8SJohn Marino usage (EXIT_TROUBLE);
2818680a9cb8SJohn Marino
2819*09d4459fSDaniel Fojt bool possibly_tty = false;
2820*09d4459fSDaniel Fojt struct stat tmp_stat;
2821*09d4459fSDaniel Fojt if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
2822680a9cb8SJohn Marino {
2823*09d4459fSDaniel Fojt if (S_ISREG (tmp_stat.st_mode))
2824*09d4459fSDaniel Fojt out_stat = tmp_stat;
2825*09d4459fSDaniel Fojt else if (S_ISCHR (tmp_stat.st_mode))
2826*09d4459fSDaniel Fojt {
2827*09d4459fSDaniel Fojt struct stat null_stat;
2828*09d4459fSDaniel Fojt if (stat ("/dev/null", &null_stat) == 0
2829*09d4459fSDaniel Fojt && SAME_INODE (tmp_stat, null_stat))
2830*09d4459fSDaniel Fojt dev_null_output = true;
2831*09d4459fSDaniel Fojt else
2832*09d4459fSDaniel Fojt possibly_tty = true;
2833*09d4459fSDaniel Fojt }
2834680a9cb8SJohn Marino }
2835680a9cb8SJohn Marino
2836*09d4459fSDaniel Fojt /* POSIX says -c, -l and -q are mutually exclusive. In this
2837*09d4459fSDaniel Fojt implementation, -q overrides -l and -L, which in turn override -c. */
2838*09d4459fSDaniel Fojt if (exit_on_match)
2839*09d4459fSDaniel Fojt list_files = LISTFILES_NONE;
2840*09d4459fSDaniel Fojt if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
2841*09d4459fSDaniel Fojt {
2842*09d4459fSDaniel Fojt count_matches = false;
2843*09d4459fSDaniel Fojt done_on_match = true;
2844*09d4459fSDaniel Fojt }
2845*09d4459fSDaniel Fojt out_quiet = count_matches | done_on_match;
2846*09d4459fSDaniel Fojt
2847*09d4459fSDaniel Fojt if (out_after < 0)
2848*09d4459fSDaniel Fojt out_after = default_context;
2849*09d4459fSDaniel Fojt if (out_before < 0)
2850*09d4459fSDaniel Fojt out_before = default_context;
2851*09d4459fSDaniel Fojt
2852*09d4459fSDaniel Fojt /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
2853*09d4459fSDaniel Fojt /dev/null'), fail without reading the input. */
2854*09d4459fSDaniel Fojt if ((max_count == 0
2855*09d4459fSDaniel Fojt || (keycc == 0 && out_invert && !match_lines && !match_words))
2856*09d4459fSDaniel Fojt && list_files != LISTFILES_NONMATCHING)
2857*09d4459fSDaniel Fojt return EXIT_FAILURE;
2858*09d4459fSDaniel Fojt
2859*09d4459fSDaniel Fojt if (color_option == 2)
2860*09d4459fSDaniel Fojt color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
2861*09d4459fSDaniel Fojt init_colorize ();
2862*09d4459fSDaniel Fojt
2863*09d4459fSDaniel Fojt if (color_option)
2864*09d4459fSDaniel Fojt {
2865*09d4459fSDaniel Fojt /* Legacy. */
2866*09d4459fSDaniel Fojt char *userval = getenv ("GREP_COLOR");
2867*09d4459fSDaniel Fojt if (userval != NULL && *userval != '\0')
2868*09d4459fSDaniel Fojt selected_match_color = context_match_color = userval;
2869*09d4459fSDaniel Fojt
2870*09d4459fSDaniel Fojt /* New GREP_COLORS has priority. */
2871*09d4459fSDaniel Fojt parse_grep_colors ();
2872*09d4459fSDaniel Fojt }
2873*09d4459fSDaniel Fojt
2874*09d4459fSDaniel Fojt initialize_unibyte_mask ();
2875*09d4459fSDaniel Fojt
2876*09d4459fSDaniel Fojt if (matcher < 0)
2877*09d4459fSDaniel Fojt matcher = G_MATCHER_INDEX;
2878*09d4459fSDaniel Fojt
2879*09d4459fSDaniel Fojt /* In a single-byte locale, switch from -F to -G if it is a single
2880*09d4459fSDaniel Fojt pattern that matches words, where -G is typically faster. In a
2881*09d4459fSDaniel Fojt multi-byte locale, switch if the patterns have an encoding error
2882*09d4459fSDaniel Fojt (where -F does not work) or if -i and the patterns will not work
2883*09d4459fSDaniel Fojt for -iF. */
2884*09d4459fSDaniel Fojt if (matcher == F_MATCHER_INDEX
2885*09d4459fSDaniel Fojt && (! localeinfo.multibyte
2886*09d4459fSDaniel Fojt ? n_patterns == 1 && match_words
2887*09d4459fSDaniel Fojt : (contains_encoding_error (keys, keycc)
2888*09d4459fSDaniel Fojt || (match_icase && !fgrep_icase_available (keys, keycc)))))
2889*09d4459fSDaniel Fojt {
2890*09d4459fSDaniel Fojt fgrep_to_grep_pattern (&keys, &keycc);
2891*09d4459fSDaniel Fojt matcher = G_MATCHER_INDEX;
2892*09d4459fSDaniel Fojt }
2893*09d4459fSDaniel Fojt /* With two or more patterns, if -F works then switch from either -E
2894*09d4459fSDaniel Fojt or -G, as -F is probably faster then. */
2895*09d4459fSDaniel Fojt else if ((matcher == G_MATCHER_INDEX || matcher == E_MATCHER_INDEX)
2896*09d4459fSDaniel Fojt && 1 < n_patterns)
2897*09d4459fSDaniel Fojt matcher = try_fgrep_pattern (matcher, keys, &keycc);
2898*09d4459fSDaniel Fojt
2899*09d4459fSDaniel Fojt execute = matchers[matcher].execute;
2900*09d4459fSDaniel Fojt compiled_pattern = matchers[matcher].compile (keys, keycc,
2901*09d4459fSDaniel Fojt matchers[matcher].syntax);
2902dc7c36e4SJohn Marino /* We need one byte prior and one after. */
2903dc7c36e4SJohn Marino char eolbytes[3] = { 0, eolbyte, 0 };
2904dc7c36e4SJohn Marino size_t match_size;
2905*09d4459fSDaniel Fojt skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
2906*09d4459fSDaniel Fojt &match_size, NULL) == 0)
2907dc7c36e4SJohn Marino == out_invert);
2908680a9cb8SJohn Marino
2909*09d4459fSDaniel Fojt int num_operands = argc - optind;
2910*09d4459fSDaniel Fojt out_file = (filename_option == 0 && num_operands <= 1
2911*09d4459fSDaniel Fojt ? - (directories == RECURSE_DIRECTORIES)
2912*09d4459fSDaniel Fojt : 0 <= filename_option);
2913680a9cb8SJohn Marino
2914*09d4459fSDaniel Fojt if (binary)
2915*09d4459fSDaniel Fojt xset_binary_mode (STDOUT_FILENO, O_BINARY);
2916*09d4459fSDaniel Fojt
2917*09d4459fSDaniel Fojt /* Prefer sysconf for page size, as getpagesize typically returns int. */
2918*09d4459fSDaniel Fojt #ifdef _SC_PAGESIZE
2919*09d4459fSDaniel Fojt long psize = sysconf (_SC_PAGESIZE);
2920*09d4459fSDaniel Fojt #else
2921*09d4459fSDaniel Fojt long psize = getpagesize ();
2922680a9cb8SJohn Marino #endif
2923*09d4459fSDaniel Fojt if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
2924*09d4459fSDaniel Fojt abort ();
2925*09d4459fSDaniel Fojt pagesize = psize;
2926*09d4459fSDaniel Fojt bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
2927*09d4459fSDaniel Fojt buffer = xmalloc (bufalloc);
2928680a9cb8SJohn Marino
2929680a9cb8SJohn Marino if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2930680a9cb8SJohn Marino devices = READ_DEVICES;
2931680a9cb8SJohn Marino
2932dc7c36e4SJohn Marino char *const *files;
2933*09d4459fSDaniel Fojt if (0 < num_operands)
2934680a9cb8SJohn Marino {
2935dc7c36e4SJohn Marino files = argv + optind;
2936680a9cb8SJohn Marino }
2937680a9cb8SJohn Marino else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
2938680a9cb8SJohn Marino {
2939dc7c36e4SJohn Marino static char *const cwd_only[] = { (char *) ".", NULL };
2940dc7c36e4SJohn Marino files = cwd_only;
2941dc7c36e4SJohn Marino omit_dot_slash = true;
2942680a9cb8SJohn Marino }
2943680a9cb8SJohn Marino else
2944dc7c36e4SJohn Marino {
2945dc7c36e4SJohn Marino static char *const stdin_only[] = { (char *) "-", NULL };
2946dc7c36e4SJohn Marino files = stdin_only;
2947dc7c36e4SJohn Marino }
2948dc7c36e4SJohn Marino
2949dc7c36e4SJohn Marino bool status = true;
2950dc7c36e4SJohn Marino do
2951dc7c36e4SJohn Marino status &= grep_command_line_arg (*files++);
2952dc7c36e4SJohn Marino while (*files != NULL);
2953680a9cb8SJohn Marino
2954*09d4459fSDaniel Fojt /* We register via atexit to test stdout. */
2955dc7c36e4SJohn Marino return errseen ? EXIT_TROUBLE : status;
2956680a9cb8SJohn Marino }
2957