xref: /dflybsd-src/contrib/diffutils/lib/exclude.c (revision 8368e25f758b85c2863fe95506b40c66b86379d5)
1 /* exclude.c -- exclude file names
2 
3    Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2018 Free Software
4    Foundation, Inc.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 /* Written by Paul Eggert <eggert@twinsun.com>
20    and Sergey Poznyakoff <gray@gnu.org>.
21    Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22    for improvement suggestions. */
23 
24 #include <config.h>
25 
26 #include <stdbool.h>
27 
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wctype.h>
35 #include <regex.h>
36 
37 #include "exclude.h"
38 #include "hash.h"
39 #include "mbuiter.h"
40 #include "fnmatch.h"
41 #include "xalloc.h"
42 #include "verify.h"
43 #include "filename.h"
44 
45 #if USE_UNLOCKED_IO
46 # include "unlocked-io.h"
47 #endif
48 
49 /* Non-GNU systems lack these options, so we don't need to check them.  */
50 #ifndef FNM_CASEFOLD
51 # define FNM_CASEFOLD 0
52 #endif
53 #ifndef FNM_EXTMATCH
54 # define FNM_EXTMATCH 0
55 #endif
56 #ifndef FNM_LEADING_DIR
57 # define FNM_LEADING_DIR 0
58 #endif
59 
60 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
61          & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
62             | FNM_CASEFOLD | FNM_EXTMATCH))
63         == 0);
64 
65 
66 /* Exclusion patterns are grouped into a singly-linked list of
67    "exclusion segments".  Each segment represents a set of patterns
68    that can be matches using the same algorithm.  Non-wildcard
69    patterns are kept in hash tables, to speed up searches.  Wildcard
70    patterns are stored as arrays of patterns. */
71 
72 
73 /* An exclude pattern-options pair.  The options are fnmatch options
74    ORed with EXCLUDE_* options.  */
75 
76 struct patopts
77   {
78     int options;
79     union
80     {
81       char const *pattern;
82       regex_t re;
83     } v;
84   };
85 
86 /* An array of pattern-options pairs.  */
87 
88 struct exclude_pattern
89   {
90     struct patopts *exclude;
91     size_t exclude_alloc;
92     size_t exclude_count;
93   };
94 
95 enum exclude_type
96   {
97     exclude_hash,                    /* a hash table of excluded names */
98     exclude_pattern                  /* an array of exclude patterns */
99   };
100 
101 struct exclude_segment
102   {
103     struct exclude_segment *next;    /* next segment in list */
104     enum exclude_type type;          /* type of this segment */
105     int options;                     /* common options for this segment */
106     union
107     {
108       Hash_table *table;             /* for type == exclude_hash */
109       struct exclude_pattern pat;    /* for type == exclude_pattern */
110     } v;
111   };
112 
113 struct pattern_buffer
114   {
115     struct pattern_buffer *next;
116     char *base;
117   };
118 
119 /* The exclude structure keeps a singly-linked list of exclude segments,
120    maintained in reverse order.  */
121 struct exclude
122   {
123     struct exclude_segment *head;
124     struct pattern_buffer *patbuf;
125   };
126 
127 /* Register BUF in the pattern buffer list of EX.  ADD_FUNC (see
128    add_exclude_file and add_exclude_fp below) can use this function
129    if it modifies the pattern, to ensure the allocated memory will be
130    properly reclaimed upon calling free_exclude. */
131 void
132 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
133 {
134   struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
135   pbuf->base = buf;
136   pbuf->next = ex->patbuf;
137   ex->patbuf = pbuf;
138 }
139 
140 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
141    Return false if STR definitely does not have wildcards.  */
142 bool
143 fnmatch_pattern_has_wildcards (const char *str, int options)
144 {
145   while (1)
146     {
147       switch (*str++)
148         {
149         case '.':
150         case '{':
151         case '}':
152         case '(':
153         case ')':
154           if (options & EXCLUDE_REGEX)
155             return true;
156           break;
157 
158         case '\\':
159           if (options & EXCLUDE_REGEX)
160             continue;
161           else
162             str += ! (options & FNM_NOESCAPE) && *str;
163           break;
164 
165         case '+': case '@': case '!':
166           if (options & FNM_EXTMATCH && *str == '(')
167             return true;
168           break;
169 
170         case '?': case '*': case '[':
171           return true;
172 
173         case '\0':
174           return false;
175         }
176     }
177 }
178 
179 static void
180 unescape_pattern (char *str)
181 {
182   char const *q = str;
183   do
184     q += *q == '\\' && q[1];
185   while ((*str++ = *q++));
186 }
187 
188 /* Return a newly allocated and empty exclude list.  */
189 
190 struct exclude *
191 new_exclude (void)
192 {
193   return xzalloc (sizeof *new_exclude ());
194 }
195 
196 /* Calculate the hash of string.  */
197 static size_t
198 string_hasher (void const *data, size_t n_buckets)
199 {
200   char const *p = data;
201   return hash_string (p, n_buckets);
202 }
203 
204 /* Ditto, for case-insensitive hashes */
205 static size_t
206 string_hasher_ci (void const *data, size_t n_buckets)
207 {
208   char const *p = data;
209   mbui_iterator_t iter;
210   size_t value = 0;
211 
212   for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
213     {
214       mbchar_t m = mbui_cur (iter);
215       wchar_t wc;
216 
217       if (m.wc_valid)
218         wc = towlower (m.wc);
219       else
220         wc = *m.ptr;
221 
222       value = (value * 31 + wc) % n_buckets;
223     }
224 
225   return value;
226 }
227 
228 /* compare two strings for equality */
229 static bool
230 string_compare (void const *data1, void const *data2)
231 {
232   char const *p1 = data1;
233   char const *p2 = data2;
234   return strcmp (p1, p2) == 0;
235 }
236 
237 /* compare two strings for equality, case-insensitive */
238 static bool
239 string_compare_ci (void const *data1, void const *data2)
240 {
241   char const *p1 = data1;
242   char const *p2 = data2;
243   return mbscasecmp (p1, p2) == 0;
244 }
245 
246 static void
247 string_free (void *data)
248 {
249   free (data);
250 }
251 
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253    to the head of EX.  */
254 static void
255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
256 {
257   struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
258   sp->type = type;
259   sp->options = options;
260   switch (type)
261     {
262     case exclude_pattern:
263       break;
264 
265     case exclude_hash:
266       sp->v.table = hash_initialize (0, NULL,
267                                      (options & FNM_CASEFOLD) ?
268                                        string_hasher_ci
269                                        : string_hasher,
270                                      (options & FNM_CASEFOLD) ?
271                                        string_compare_ci
272                                        : string_compare,
273                                      string_free);
274       break;
275     }
276   sp->next = ex->head;
277   ex->head = sp;
278 }
279 
280 /* Free a single exclude segment */
281 static void
282 free_exclude_segment (struct exclude_segment *seg)
283 {
284   size_t i;
285 
286   switch (seg->type)
287     {
288     case exclude_pattern:
289       for (i = 0; i < seg->v.pat.exclude_count; i++)
290         {
291           if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
292             regfree (&seg->v.pat.exclude[i].v.re);
293         }
294       free (seg->v.pat.exclude);
295       break;
296 
297     case exclude_hash:
298       hash_free (seg->v.table);
299       break;
300     }
301   free (seg);
302 }
303 
304 /* Free the storage associated with an exclude list.  */
305 void
306 free_exclude (struct exclude *ex)
307 {
308   struct exclude_segment *seg;
309   struct pattern_buffer *pbuf;
310 
311   for (seg = ex->head; seg; )
312     {
313       struct exclude_segment *next = seg->next;
314       free_exclude_segment (seg);
315       seg = next;
316     }
317 
318   for (pbuf = ex->patbuf; pbuf; )
319     {
320       struct pattern_buffer *next = pbuf->next;
321       free (pbuf->base);
322       free (pbuf);
323       pbuf = next;
324     }
325 
326   free (ex);
327 }
328 
329 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
330    (unlike fnmatch) wildcards are disabled in PATTERN.  */
331 
332 static int
333 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
334 {
335   if (! (options & FNM_LEADING_DIR))
336     return ((options & FNM_CASEFOLD)
337             ? mbscasecmp (pattern, f)
338             : strcmp (pattern, f));
339   else if (! (options & FNM_CASEFOLD))
340     {
341       size_t patlen = strlen (pattern);
342       int r = strncmp (pattern, f, patlen);
343       if (! r)
344         {
345           r = f[patlen];
346           if (r == '/')
347             r = 0;
348         }
349       return r;
350     }
351   else
352     {
353       /* Walk through a copy of F, seeing whether P matches any prefix
354          of F.
355 
356          FIXME: This is an O(N**2) algorithm; it should be O(N).
357          Also, the copy should not be necessary.  However, fixing this
358          will probably involve a change to the mbs* API.  */
359 
360       char *fcopy = xstrdup (f);
361       char *p;
362       int r;
363       for (p = fcopy; ; *p++ = '/')
364         {
365           p = strchr (p, '/');
366           if (p)
367             *p = '\0';
368           r = mbscasecmp (pattern, fcopy);
369           if (!p || r <= 0)
370             break;
371         }
372       free (fcopy);
373       return r;
374     }
375 }
376 
377 bool
378 exclude_fnmatch (char const *pattern, char const *f, int options)
379 {
380   int (*matcher) (char const *, char const *, int) =
381     (options & EXCLUDE_WILDCARDS
382      ? fnmatch
383      : fnmatch_no_wildcards);
384   bool matched = ((*matcher) (pattern, f, options) == 0);
385   char const *p;
386 
387   if (! (options & EXCLUDE_ANCHORED))
388     for (p = f; *p && ! matched; p++)
389       if (*p == '/' && p[1] != '/')
390         matched = ((*matcher) (pattern, p + 1, options) == 0);
391 
392   return matched;
393 }
394 
395 static bool
396 exclude_patopts (struct patopts const *opts, char const *f)
397 {
398   int options = opts->options;
399 
400   return (options & EXCLUDE_REGEX)
401           ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
402           : exclude_fnmatch (opts->v.pattern, f, options);
403 }
404 
405 /* Return true if the exclude_pattern segment SEG matches F.  */
406 
407 static bool
408 file_pattern_matches (struct exclude_segment const *seg, char const *f)
409 {
410   size_t exclude_count = seg->v.pat.exclude_count;
411   struct patopts const *exclude = seg->v.pat.exclude;
412   size_t i;
413 
414   for (i = 0; i < exclude_count; i++)
415     {
416       if (exclude_patopts (exclude + i, f))
417         return true;
418     }
419   return false;
420 }
421 
422 /* Return true if the exclude_hash segment SEG matches F.
423    BUFFER is an auxiliary storage of the same length as F (with nul
424    terminator included) */
425 static bool
426 file_name_matches (struct exclude_segment const *seg, char const *f,
427                    char *buffer)
428 {
429   int options = seg->options;
430   Hash_table *table = seg->v.table;
431 
432   do
433     {
434       /* initialize the pattern */
435       strcpy (buffer, f);
436 
437       while (1)
438         {
439           if (hash_lookup (table, buffer))
440             return true;
441           if (options & FNM_LEADING_DIR)
442             {
443               char *p = strrchr (buffer, '/');
444               if (p)
445                 {
446                   *p = 0;
447                   continue;
448                 }
449             }
450           break;
451         }
452 
453       if (!(options & EXCLUDE_ANCHORED))
454         {
455           f = strchr (f, '/');
456           if (f)
457             f++;
458         }
459       else
460         break;
461     }
462   while (f);
463 
464   return false;
465 }
466 
467 /* Return true if EX excludes F.  */
468 
469 bool
470 excluded_file_name (struct exclude const *ex, char const *f)
471 {
472   struct exclude_segment *seg;
473   bool invert = false;
474   char *filename = NULL;
475 
476   /* If no patterns are given, the default is to include.  */
477   if (!ex->head)
478     return false;
479 
480   /* Scan through the segments, reporting the status of the first match.
481      The segments are in reverse order, so this reports the status of
482      the last match in the original option list.  */
483   for (seg = ex->head; ; seg = seg->next)
484     {
485       if (seg->type == exclude_hash)
486         {
487           if (!filename)
488             filename = xmalloc (strlen (f) + 1);
489           if (file_name_matches (seg, f, filename))
490             break;
491         }
492       else
493         {
494           if (file_pattern_matches (seg, f))
495             break;
496         }
497 
498       if (! seg->next)
499         {
500           /* If patterns are given but none match, the default is the
501              opposite of the last segment (i.e., the first in the
502              original option list).  For example, in the command
503              'grep -r --exclude="a*" --include="*b" pat dir', the
504              first option is --exclude so any file name matching
505              neither a* nor *b is included.  */
506           invert = true;
507           break;
508         }
509     }
510 
511   free (filename);
512   return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
513 }
514 
515 /* Append to EX the exclusion PATTERN with OPTIONS.  */
516 
517 void
518 add_exclude (struct exclude *ex, char const *pattern, int options)
519 {
520   struct exclude_segment *seg;
521   struct exclude_pattern *pat;
522   struct patopts *patopts;
523 
524   if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
525       && fnmatch_pattern_has_wildcards (pattern, options))
526     {
527       if (! (ex->head && ex->head->type == exclude_pattern
528              && ((ex->head->options & EXCLUDE_INCLUDE)
529                  == (options & EXCLUDE_INCLUDE))))
530         new_exclude_segment (ex, exclude_pattern, options);
531 
532       seg = ex->head;
533 
534       pat = &seg->v.pat;
535       if (pat->exclude_count == pat->exclude_alloc)
536         pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
537                                    sizeof *pat->exclude);
538       patopts = &pat->exclude[pat->exclude_count++];
539 
540       patopts->options = options;
541       if (options & EXCLUDE_REGEX)
542         {
543           int rc;
544           int cflags = REG_NOSUB|REG_EXTENDED|
545                        ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
546 
547           if (options & FNM_LEADING_DIR)
548             {
549               char *tmp;
550               size_t len = strlen (pattern);
551 
552               while (len > 0 && ISSLASH (pattern[len-1]))
553                 --len;
554 
555               if (len == 0)
556                 rc = 1;
557               else
558                 {
559                   tmp = xmalloc (len + 7);
560                   memcpy (tmp, pattern, len);
561                   strcpy (tmp + len, "(/.*)?");
562                   rc = regcomp (&patopts->v.re, tmp, cflags);
563                   free (tmp);
564                 }
565             }
566           else
567             rc = regcomp (&patopts->v.re, pattern, cflags);
568 
569           if (rc)
570             {
571               pat->exclude_count--;
572               return;
573             }
574         }
575       else
576         {
577           if (options & EXCLUDE_ALLOC)
578             {
579               pattern = xstrdup (pattern);
580               exclude_add_pattern_buffer (ex, (char*) pattern);
581             }
582           patopts->v.pattern = pattern;
583         }
584     }
585   else
586     {
587       char *str, *p;
588       int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
589                                 | FNM_LEADING_DIR | FNM_CASEFOLD);
590       if (! (ex->head && ex->head->type == exclude_hash
591              && ((ex->head->options & exclude_hash_flags)
592                  == (options & exclude_hash_flags))))
593         new_exclude_segment (ex, exclude_hash, options);
594       seg = ex->head;
595 
596       str = xstrdup (pattern);
597       if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
598         unescape_pattern (str);
599       p = hash_insert (seg->v.table, str);
600       if (p != str)
601         free (str);
602     }
603 }
604 
605 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
606    OPTIONS.  LINE_END terminates each pattern in the file.  If
607    LINE_END is a space character, ignore trailing spaces and empty
608    lines in FP.  Return -1 on failure, 0 on success.  */
609 
610 int
611 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
612                 struct exclude *ex, FILE *fp, int options,
613                 char line_end,
614                 void *data)
615 {
616   char *buf = NULL;
617   char *p;
618   char *pattern;
619   char const *lim;
620   size_t buf_alloc = 0;
621   size_t buf_count = 0;
622   int c;
623   int e = 0;
624 
625   while ((c = getc (fp)) != EOF)
626     {
627       if (buf_count == buf_alloc)
628         buf = x2realloc (buf, &buf_alloc);
629       buf[buf_count++] = c;
630     }
631 
632   if (ferror (fp))
633     e = errno;
634 
635   buf = xrealloc (buf, buf_count + 1);
636   buf[buf_count] = line_end;
637   lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
638 
639   exclude_add_pattern_buffer (ex, buf);
640 
641   pattern = buf;
642 
643   for (p = buf; p < lim; p++)
644     if (*p == line_end)
645       {
646         char *pattern_end = p;
647 
648         if (isspace ((unsigned char) line_end))
649           {
650             for (; ; pattern_end--)
651               if (pattern_end == pattern)
652                 goto next_pattern;
653               else if (! isspace ((unsigned char) pattern_end[-1]))
654                 break;
655           }
656 
657         *pattern_end = '\0';
658         (*add_func) (ex, pattern, options, data);
659 
660       next_pattern:
661         pattern = p + 1;
662       }
663 
664   errno = e;
665   return e ? -1 : 0;
666 }
667 
668 static void
669 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
670 {
671   void (**addfnptr) (struct exclude *, char const *, int) = data;
672   (*addfnptr) (ex, pattern, options);
673 }
674 
675 int
676 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
677                   struct exclude *ex, char const *file_name, int options,
678                   char line_end)
679 {
680   bool use_stdin = file_name[0] == '-' && !file_name[1];
681   FILE *in;
682   int rc = 0;
683 
684   if (use_stdin)
685     in = stdin;
686   else if (! (in = fopen (file_name, "r")))
687     return -1;
688 
689   rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
690 
691   if (!use_stdin && fclose (in) != 0)
692     rc = -1;
693 
694   return rc;
695 }
696