1 /* exclude.c -- exclude file names
2
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2020 Free Software
4 Foundation, Inc.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
23
24 #include <config.h>
25
26 #include <stdbool.h>
27
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wctype.h>
35 #include <regex.h>
36
37 #include "exclude.h"
38 #include "hash.h"
39 #include "mbuiter.h"
40 #include "fnmatch.h"
41 #include "xalloc.h"
42 #include "verify.h"
43 #include "filename.h"
44
45 #if USE_UNLOCKED_IO
46 # include "unlocked-io.h"
47 #endif
48
49 /* Non-GNU systems lack these options, so we don't need to check them. */
50 #ifndef FNM_CASEFOLD
51 # define FNM_CASEFOLD 0
52 #endif
53 #ifndef FNM_EXTMATCH
54 # define FNM_EXTMATCH 0
55 #endif
56 #ifndef FNM_LEADING_DIR
57 # define FNM_LEADING_DIR 0
58 #endif
59
60 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
61 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
62 | FNM_CASEFOLD | FNM_EXTMATCH))
63 == 0);
64
65
66 /* Exclusion patterns are grouped into a singly-linked list of
67 "exclusion segments". Each segment represents a set of patterns
68 that can be matches using the same algorithm. Non-wildcard
69 patterns are kept in hash tables, to speed up searches. Wildcard
70 patterns are stored as arrays of patterns. */
71
72
73 /* An exclude pattern-options pair. The options are fnmatch options
74 ORed with EXCLUDE_* options. */
75
76 struct patopts
77 {
78 int options;
79 union
80 {
81 char const *pattern;
82 regex_t re;
83 } v;
84 };
85
86 /* An array of pattern-options pairs. */
87
88 struct exclude_pattern
89 {
90 struct patopts *exclude;
91 size_t exclude_alloc;
92 size_t exclude_count;
93 };
94
95 enum exclude_type
96 {
97 exclude_hash, /* a hash table of excluded names */
98 exclude_pattern /* an array of exclude patterns */
99 };
100
101 struct exclude_segment
102 {
103 struct exclude_segment *next; /* next segment in list */
104 enum exclude_type type; /* type of this segment */
105 int options; /* common options for this segment */
106 union
107 {
108 Hash_table *table; /* for type == exclude_hash */
109 struct exclude_pattern pat; /* for type == exclude_pattern */
110 } v;
111 };
112
113 struct pattern_buffer
114 {
115 struct pattern_buffer *next;
116 char *base;
117 };
118
119 /* The exclude structure keeps a singly-linked list of exclude segments,
120 maintained in reverse order. */
121 struct exclude
122 {
123 struct exclude_segment *head;
124 struct pattern_buffer *patbuf;
125 };
126
127 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
128 add_exclude_file and add_exclude_fp below) can use this function
129 if it modifies the pattern, to ensure the allocated memory will be
130 properly reclaimed upon calling free_exclude. */
131 void
exclude_add_pattern_buffer(struct exclude * ex,char * buf)132 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
133 {
134 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
135 pbuf->base = buf;
136 pbuf->next = ex->patbuf;
137 ex->patbuf = pbuf;
138 }
139
140 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
141 Return false if STR definitely does not have wildcards. */
142 bool
fnmatch_pattern_has_wildcards(const char * str,int options)143 fnmatch_pattern_has_wildcards (const char *str, int options)
144 {
145 while (1)
146 {
147 switch (*str++)
148 {
149 case '.':
150 case '{':
151 case '}':
152 case '(':
153 case ')':
154 if (options & EXCLUDE_REGEX)
155 return true;
156 break;
157
158 case '\\':
159 if (options & EXCLUDE_REGEX)
160 continue;
161 else
162 str += ! (options & FNM_NOESCAPE) && *str;
163 break;
164
165 case '+': case '@': case '!':
166 if (options & FNM_EXTMATCH && *str == '(')
167 return true;
168 break;
169
170 case '?': case '*': case '[':
171 return true;
172
173 case '\0':
174 return false;
175 }
176 }
177 }
178
179 static void
unescape_pattern(char * str)180 unescape_pattern (char *str)
181 {
182 char const *q = str;
183 do
184 q += *q == '\\' && q[1];
185 while ((*str++ = *q++));
186 }
187
188 /* Return a newly allocated and empty exclude list. */
189
190 struct exclude *
new_exclude(void)191 new_exclude (void)
192 {
193 return xzalloc (sizeof *new_exclude ());
194 }
195
196 /* Calculate the hash of string. */
197 static size_t
string_hasher(void const * data,size_t n_buckets)198 string_hasher (void const *data, size_t n_buckets)
199 {
200 char const *p = data;
201 return hash_string (p, n_buckets);
202 }
203
204 /* Ditto, for case-insensitive hashes */
205 static size_t
string_hasher_ci(void const * data,size_t n_buckets)206 string_hasher_ci (void const *data, size_t n_buckets)
207 {
208 char const *p = data;
209 mbui_iterator_t iter;
210 size_t value = 0;
211
212 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
213 {
214 mbchar_t m = mbui_cur (iter);
215 wchar_t wc;
216
217 if (m.wc_valid)
218 wc = towlower (m.wc);
219 else
220 wc = *m.ptr;
221
222 value = (value * 31 + wc) % n_buckets;
223 }
224
225 return value;
226 }
227
228 /* compare two strings for equality */
229 static bool
string_compare(void const * data1,void const * data2)230 string_compare (void const *data1, void const *data2)
231 {
232 char const *p1 = data1;
233 char const *p2 = data2;
234 return strcmp (p1, p2) == 0;
235 }
236
237 /* compare two strings for equality, case-insensitive */
238 static bool
string_compare_ci(void const * data1,void const * data2)239 string_compare_ci (void const *data1, void const *data2)
240 {
241 char const *p1 = data1;
242 char const *p2 = data2;
243 return mbscasecmp (p1, p2) == 0;
244 }
245
246 static void
string_free(void * data)247 string_free (void *data)
248 {
249 free (data);
250 }
251
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
254 static void
new_exclude_segment(struct exclude * ex,enum exclude_type type,int options)255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
256 {
257 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
258 sp->type = type;
259 sp->options = options;
260 switch (type)
261 {
262 case exclude_pattern:
263 break;
264
265 case exclude_hash:
266 sp->v.table = hash_initialize (0, NULL,
267 (options & FNM_CASEFOLD) ?
268 string_hasher_ci
269 : string_hasher,
270 (options & FNM_CASEFOLD) ?
271 string_compare_ci
272 : string_compare,
273 string_free);
274 break;
275 }
276 sp->next = ex->head;
277 ex->head = sp;
278 }
279
280 /* Free a single exclude segment */
281 static void
free_exclude_segment(struct exclude_segment * seg)282 free_exclude_segment (struct exclude_segment *seg)
283 {
284 size_t i;
285
286 switch (seg->type)
287 {
288 case exclude_pattern:
289 for (i = 0; i < seg->v.pat.exclude_count; i++)
290 {
291 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
292 regfree (&seg->v.pat.exclude[i].v.re);
293 }
294 free (seg->v.pat.exclude);
295 break;
296
297 case exclude_hash:
298 hash_free (seg->v.table);
299 break;
300 }
301 free (seg);
302 }
303
304 /* Free the storage associated with an exclude list. */
305 void
free_exclude(struct exclude * ex)306 free_exclude (struct exclude *ex)
307 {
308 struct exclude_segment *seg;
309 struct pattern_buffer *pbuf;
310
311 for (seg = ex->head; seg; )
312 {
313 struct exclude_segment *next = seg->next;
314 free_exclude_segment (seg);
315 seg = next;
316 }
317
318 for (pbuf = ex->patbuf; pbuf; )
319 {
320 struct pattern_buffer *next = pbuf->next;
321 free (pbuf->base);
322 free (pbuf);
323 pbuf = next;
324 }
325
326 free (ex);
327 }
328
329 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
330 (unlike fnmatch) wildcards are disabled in PATTERN. */
331
332 static int
fnmatch_no_wildcards(char const * pattern,char const * f,int options)333 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
334 {
335 if (! (options & FNM_LEADING_DIR))
336 return ((options & FNM_CASEFOLD)
337 ? mbscasecmp (pattern, f)
338 : strcmp (pattern, f));
339 else if (! (options & FNM_CASEFOLD))
340 {
341 size_t patlen = strlen (pattern);
342 int r = strncmp (pattern, f, patlen);
343 if (! r)
344 {
345 r = f[patlen];
346 if (r == '/')
347 r = 0;
348 }
349 return r;
350 }
351 else
352 {
353 /* Walk through a copy of F, seeing whether P matches any prefix
354 of F.
355
356 FIXME: This is an O(N**2) algorithm; it should be O(N).
357 Also, the copy should not be necessary. However, fixing this
358 will probably involve a change to the mbs* API. */
359
360 char *fcopy = xstrdup (f);
361 char *p;
362 int r;
363 for (p = fcopy; ; *p++ = '/')
364 {
365 p = strchr (p, '/');
366 if (p)
367 *p = '\0';
368 r = mbscasecmp (pattern, fcopy);
369 if (!p || r <= 0)
370 break;
371 }
372 free (fcopy);
373 return r;
374 }
375 }
376
377 bool
exclude_fnmatch(char const * pattern,char const * f,int options)378 exclude_fnmatch (char const *pattern, char const *f, int options)
379 {
380 int (*matcher) (char const *, char const *, int) =
381 (options & EXCLUDE_WILDCARDS
382 ? fnmatch
383 : fnmatch_no_wildcards);
384 bool matched = ((*matcher) (pattern, f, options) == 0);
385 char const *p;
386
387 if (! (options & EXCLUDE_ANCHORED))
388 for (p = f; *p && ! matched; p++)
389 if (*p == '/' && p[1] != '/')
390 matched = ((*matcher) (pattern, p + 1, options) == 0);
391
392 return matched;
393 }
394
395 static bool
exclude_patopts(struct patopts const * opts,char const * f)396 exclude_patopts (struct patopts const *opts, char const *f)
397 {
398 int options = opts->options;
399
400 return (options & EXCLUDE_REGEX)
401 ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
402 : exclude_fnmatch (opts->v.pattern, f, options);
403 }
404
405 /* Return true if the exclude_pattern segment SEG matches F. */
406
407 static bool
file_pattern_matches(struct exclude_segment const * seg,char const * f)408 file_pattern_matches (struct exclude_segment const *seg, char const *f)
409 {
410 size_t exclude_count = seg->v.pat.exclude_count;
411 struct patopts const *exclude = seg->v.pat.exclude;
412 size_t i;
413
414 for (i = 0; i < exclude_count; i++)
415 {
416 if (exclude_patopts (exclude + i, f))
417 return true;
418 }
419 return false;
420 }
421
422 /* Return true if the exclude_hash segment SEG matches F.
423 BUFFER is an auxiliary storage of the same length as F (with nul
424 terminator included) */
425 static bool
file_name_matches(struct exclude_segment const * seg,char const * f,char * buffer)426 file_name_matches (struct exclude_segment const *seg, char const *f,
427 char *buffer)
428 {
429 int options = seg->options;
430 Hash_table *table = seg->v.table;
431
432 do
433 {
434 /* initialize the pattern */
435 strcpy (buffer, f);
436
437 while (1)
438 {
439 if (hash_lookup (table, buffer))
440 return true;
441 if (options & FNM_LEADING_DIR)
442 {
443 char *p = strrchr (buffer, '/');
444 if (p)
445 {
446 *p = 0;
447 continue;
448 }
449 }
450 break;
451 }
452
453 if (!(options & EXCLUDE_ANCHORED))
454 {
455 f = strchr (f, '/');
456 if (f)
457 f++;
458 }
459 else
460 break;
461 }
462 while (f);
463
464 return false;
465 }
466
467 /* Return true if EX excludes F. */
468
469 bool
excluded_file_name(struct exclude const * ex,char const * f)470 excluded_file_name (struct exclude const *ex, char const *f)
471 {
472 struct exclude_segment *seg;
473 bool invert = false;
474 char *filename = NULL;
475
476 /* If no patterns are given, the default is to include. */
477 if (!ex->head)
478 return false;
479
480 /* Scan through the segments, reporting the status of the first match.
481 The segments are in reverse order, so this reports the status of
482 the last match in the original option list. */
483 for (seg = ex->head; ; seg = seg->next)
484 {
485 if (seg->type == exclude_hash)
486 {
487 if (!filename)
488 filename = xmalloc (strlen (f) + 1);
489 if (file_name_matches (seg, f, filename))
490 break;
491 }
492 else
493 {
494 if (file_pattern_matches (seg, f))
495 break;
496 }
497
498 if (! seg->next)
499 {
500 /* If patterns are given but none match, the default is the
501 opposite of the last segment (i.e., the first in the
502 original option list). For example, in the command
503 'grep -r --exclude="a*" --include="*b" pat dir', the
504 first option is --exclude so any file name matching
505 neither a* nor *b is included. */
506 invert = true;
507 break;
508 }
509 }
510
511 free (filename);
512 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
513 }
514
515 /* Append to EX the exclusion PATTERN with OPTIONS. */
516
517 void
add_exclude(struct exclude * ex,char const * pattern,int options)518 add_exclude (struct exclude *ex, char const *pattern, int options)
519 {
520 struct exclude_segment *seg;
521 struct exclude_pattern *pat;
522 struct patopts *patopts;
523
524 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
525 && fnmatch_pattern_has_wildcards (pattern, options))
526 {
527 if (! (ex->head && ex->head->type == exclude_pattern
528 && ((ex->head->options & EXCLUDE_INCLUDE)
529 == (options & EXCLUDE_INCLUDE))))
530 new_exclude_segment (ex, exclude_pattern, options);
531
532 seg = ex->head;
533
534 pat = &seg->v.pat;
535 if (pat->exclude_count == pat->exclude_alloc)
536 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
537 sizeof *pat->exclude);
538 patopts = &pat->exclude[pat->exclude_count++];
539
540 patopts->options = options;
541 if (options & EXCLUDE_REGEX)
542 {
543 int rc;
544 int cflags = REG_NOSUB|REG_EXTENDED|
545 ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
546
547 if (options & FNM_LEADING_DIR)
548 {
549 char *tmp;
550 size_t len = strlen (pattern);
551
552 while (len > 0 && ISSLASH (pattern[len-1]))
553 --len;
554
555 if (len == 0)
556 rc = 1;
557 else
558 {
559 tmp = xmalloc (len + 7);
560 memcpy (tmp, pattern, len);
561 strcpy (tmp + len, "(/.*)?");
562 rc = regcomp (&patopts->v.re, tmp, cflags);
563 free (tmp);
564 }
565 }
566 else
567 rc = regcomp (&patopts->v.re, pattern, cflags);
568
569 if (rc)
570 {
571 pat->exclude_count--;
572 return;
573 }
574 }
575 else
576 {
577 if (options & EXCLUDE_ALLOC)
578 {
579 pattern = xstrdup (pattern);
580 exclude_add_pattern_buffer (ex, (char*) pattern);
581 }
582 patopts->v.pattern = pattern;
583 }
584 }
585 else
586 {
587 char *str, *p;
588 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
589 | FNM_LEADING_DIR | FNM_CASEFOLD);
590 if (! (ex->head && ex->head->type == exclude_hash
591 && ((ex->head->options & exclude_hash_flags)
592 == (options & exclude_hash_flags))))
593 new_exclude_segment (ex, exclude_hash, options);
594 seg = ex->head;
595
596 str = xstrdup (pattern);
597 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
598 unescape_pattern (str);
599 p = hash_insert (seg->v.table, str);
600 if (p != str)
601 free (str);
602 }
603 }
604
605 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
606 OPTIONS. LINE_END terminates each pattern in the file. If
607 LINE_END is a space character, ignore trailing spaces and empty
608 lines in FP. Return -1 on failure, 0 on success. */
609
610 int
add_exclude_fp(void (* add_func)(struct exclude *,char const *,int,void *),struct exclude * ex,FILE * fp,int options,char line_end,void * data)611 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
612 struct exclude *ex, FILE *fp, int options,
613 char line_end,
614 void *data)
615 {
616 char *buf = NULL;
617 char *p;
618 char *pattern;
619 char const *lim;
620 size_t buf_alloc = 0;
621 size_t buf_count = 0;
622 int c;
623 int e = 0;
624
625 while ((c = getc (fp)) != EOF)
626 {
627 if (buf_count == buf_alloc)
628 buf = x2realloc (buf, &buf_alloc);
629 buf[buf_count++] = c;
630 }
631
632 if (ferror (fp))
633 e = errno;
634
635 buf = xrealloc (buf, buf_count + 1);
636 buf[buf_count] = line_end;
637 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
638
639 exclude_add_pattern_buffer (ex, buf);
640
641 pattern = buf;
642
643 for (p = buf; p < lim; p++)
644 if (*p == line_end)
645 {
646 char *pattern_end = p;
647
648 if (isspace ((unsigned char) line_end))
649 {
650 for (; ; pattern_end--)
651 if (pattern_end == pattern)
652 goto next_pattern;
653 else if (! isspace ((unsigned char) pattern_end[-1]))
654 break;
655 }
656
657 *pattern_end = '\0';
658 (*add_func) (ex, pattern, options, data);
659
660 next_pattern:
661 pattern = p + 1;
662 }
663
664 errno = e;
665 return e ? -1 : 0;
666 }
667
668 static void
call_addfn(struct exclude * ex,char const * pattern,int options,void * data)669 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
670 {
671 void (**addfnptr) (struct exclude *, char const *, int) = data;
672 (*addfnptr) (ex, pattern, options);
673 }
674
675 int
add_exclude_file(void (* add_func)(struct exclude *,char const *,int),struct exclude * ex,char const * file_name,int options,char line_end)676 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
677 struct exclude *ex, char const *file_name, int options,
678 char line_end)
679 {
680 bool use_stdin = file_name[0] == '-' && !file_name[1];
681 FILE *in;
682 int rc = 0;
683
684 if (use_stdin)
685 in = stdin;
686 else if (! (in = fopen (file_name, "r")))
687 return -1;
688
689 rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
690
691 if (!use_stdin && fclose (in) != 0)
692 rc = -1;
693
694 return rc;
695 }
696