xref: /dflybsd-src/contrib/diffutils/lib/fnmatch_loop.c (revision 20f6ddd0df90767e1eba2d12dfa8e1769be7cec7)
1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2018 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Match STRING against the file name pattern PATTERN, returning zero if
18    it matches, nonzero if not.  */
19 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
20                 const CHAR *string_end, bool no_leading_period, int flags)
21      internal_function;
22 static const CHAR *END (const CHAR *patternp) internal_function;
23 
24 static int
25 internal_function
26 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
27      bool no_leading_period, int flags)
28 {
29   register const CHAR *p = pattern, *n = string;
30   register UCHAR c;
31 #ifdef _LIBC
32 # if WIDE_CHAR_VERSION
33   const char *collseq = (const char *)
34     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
35 # else
36   const UCHAR *collseq = (const UCHAR *)
37     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
38 # endif
39 #endif
40 
41   while ((c = *p++) != L_('\0'))
42     {
43       bool new_no_leading_period = false;
44       c = FOLD (c);
45 
46       switch (c)
47         {
48         case L_('?'):
49           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
50             {
51               int res;
52 
53               res = EXT (c, p, n, string_end, no_leading_period,
54                          flags);
55               if (res != -1)
56                 return res;
57             }
58 
59           if (n == string_end)
60             return FNM_NOMATCH;
61           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
62             return FNM_NOMATCH;
63           else if (*n == L_('.') && no_leading_period)
64             return FNM_NOMATCH;
65           break;
66 
67         case L_('\\'):
68           if (!(flags & FNM_NOESCAPE))
69             {
70               c = *p++;
71               if (c == L_('\0'))
72                 /* Trailing \ loses.  */
73                 return FNM_NOMATCH;
74               c = FOLD (c);
75             }
76           if (n == string_end || FOLD ((UCHAR) *n) != c)
77             return FNM_NOMATCH;
78           break;
79 
80         case L_('*'):
81           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
82             {
83               int res;
84 
85               res = EXT (c, p, n, string_end, no_leading_period,
86                          flags);
87               if (res != -1)
88                 return res;
89             }
90 
91           if (n != string_end && *n == L_('.') && no_leading_period)
92             return FNM_NOMATCH;
93 
94           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
95             {
96               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
97                 {
98                   const CHAR *endp = END (p);
99                   if (endp != p)
100                     {
101                       /* This is a pattern.  Skip over it.  */
102                       p = endp;
103                       continue;
104                     }
105                 }
106 
107               if (c == L_('?'))
108                 {
109                   /* A ? needs to match one character.  */
110                   if (n == string_end)
111                     /* There isn't another character; no match.  */
112                     return FNM_NOMATCH;
113                   else if (*n == L_('/')
114                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
115                     /* A slash does not match a wildcard under
116                        FNM_FILE_NAME.  */
117                     return FNM_NOMATCH;
118                   else
119                     /* One character of the string is consumed in matching
120                        this ? wildcard, so *??? won't match if there are
121                        less than three characters.  */
122                     ++n;
123                 }
124             }
125 
126           if (c == L_('\0'))
127             /* The wildcard(s) is/are the last element of the pattern.
128                If the name is a file name and contains another slash
129                this means it cannot match, unless the FNM_LEADING_DIR
130                flag is set.  */
131             {
132               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
133 
134               if (flags & FNM_FILE_NAME)
135                 {
136                   if (flags & FNM_LEADING_DIR)
137                     result = 0;
138                   else
139                     {
140                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
141                         result = 0;
142                     }
143                 }
144 
145               return result;
146             }
147           else
148             {
149               const CHAR *endp;
150 
151               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
152                              string_end - n);
153               if (endp == NULL)
154                 endp = string_end;
155 
156               if (c == L_('[')
157                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
158                       && (c == L_('@') || c == L_('+') || c == L_('!'))
159                       && *p == L_('(')))
160                 {
161                   int flags2 = ((flags & FNM_FILE_NAME)
162                                 ? flags : (flags & ~FNM_PERIOD));
163                   bool no_leading_period2 = no_leading_period;
164 
165                   for (--p; n < endp; ++n, no_leading_period2 = false)
166                     if (FCT (p, n, string_end, no_leading_period2, flags2)
167                         == 0)
168                       return 0;
169                 }
170               else if (c == L_('/') && (flags & FNM_FILE_NAME))
171                 {
172                   while (n < string_end && *n != L_('/'))
173                     ++n;
174                   if (n < string_end && *n == L_('/')
175                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
176                           == 0))
177                     return 0;
178                 }
179               else
180                 {
181                   int flags2 = ((flags & FNM_FILE_NAME)
182                                 ? flags : (flags & ~FNM_PERIOD));
183                   int no_leading_period2 = no_leading_period;
184 
185                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
186                     c = *p;
187                   c = FOLD (c);
188                   for (--p; n < endp; ++n, no_leading_period2 = false)
189                     if (FOLD ((UCHAR) *n) == c
190                         && (FCT (p, n, string_end, no_leading_period2, flags2)
191                             == 0))
192                       return 0;
193                 }
194             }
195 
196           /* If we come here no match is possible with the wildcard.  */
197           return FNM_NOMATCH;
198 
199         case L_('['):
200           {
201             /* Nonzero if the sense of the character class is inverted.  */
202             const CHAR *p_init = p;
203             const CHAR *n_init = n;
204             register bool not;
205             CHAR cold;
206             UCHAR fn;
207 
208             if (posixly_correct == 0)
209               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
210 
211             if (n == string_end)
212               return FNM_NOMATCH;
213 
214             if (*n == L_('.') && no_leading_period)
215               return FNM_NOMATCH;
216 
217             if (*n == L_('/') && (flags & FNM_FILE_NAME))
218               /* '/' cannot be matched.  */
219               return FNM_NOMATCH;
220 
221             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
222             if (not)
223               ++p;
224 
225             fn = FOLD ((UCHAR) *n);
226 
227             c = *p++;
228             for (;;)
229               {
230 		bool is_range = false;
231 
232                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
233                   {
234                     if (*p == L_('\0'))
235                       return FNM_NOMATCH;
236                     c = FOLD ((UCHAR) *p);
237                     ++p;
238 
239                     goto normal_bracket;
240                   }
241                 else if (c == L_('[') && *p == L_(':'))
242                   {
243                     /* Leave room for the null.  */
244                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
245                     size_t c1 = 0;
246 #if defined _LIBC || WIDE_CHAR_SUPPORT
247                     wctype_t wt;
248 #endif
249                     const CHAR *startp = p;
250 
251                     for (;;)
252                       {
253                         if (c1 == CHAR_CLASS_MAX_LENGTH)
254                           /* The name is too long and therefore the pattern
255                              is ill-formed.  */
256                           return FNM_NOMATCH;
257 
258                         c = *++p;
259                         if (c == L_(':') && p[1] == L_(']'))
260                           {
261                             p += 2;
262                             break;
263                           }
264                         if (c < L_('a') || c >= L_('z'))
265                           {
266                             /* This cannot possibly be a character class name.
267                                Match it as a normal range.  */
268                             p = startp;
269                             c = L_('[');
270                             goto normal_bracket;
271                           }
272                         str[c1++] = c;
273                       }
274                     str[c1] = L_('\0');
275 
276 #if defined _LIBC || WIDE_CHAR_SUPPORT
277                     wt = IS_CHAR_CLASS (str);
278                     if (wt == 0)
279                       /* Invalid character class name.  */
280                       return FNM_NOMATCH;
281 
282 # if defined _LIBC && ! WIDE_CHAR_VERSION
283                     /* The following code is glibc specific but does
284                        there a good job in speeding up the code since
285                        we can avoid the btowc() call.  */
286                     if (_ISCTYPE ((UCHAR) *n, wt))
287                       goto matched;
288 # else
289                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
290                       goto matched;
291 # endif
292 #else
293                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
294                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
295                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
296                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
297                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
298                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
299                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
300                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
301                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
302                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
303                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
304                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
305                       goto matched;
306 #endif
307                     c = *p++;
308                   }
309 #ifdef _LIBC
310                 else if (c == L_('[') && *p == L_('='))
311                   {
312                     UCHAR str[1];
313                     uint32_t nrules =
314                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
315                     const CHAR *startp = p;
316 
317                     c = *++p;
318                     if (c == L_('\0'))
319                       {
320                         p = startp;
321                         c = L_('[');
322                         goto normal_bracket;
323                       }
324                     str[0] = c;
325 
326                     c = *++p;
327                     if (c != L_('=') || p[1] != L_(']'))
328                       {
329                         p = startp;
330                         c = L_('[');
331                         goto normal_bracket;
332                       }
333                     p += 2;
334 
335                     if (nrules == 0)
336                       {
337                         if ((UCHAR) *n == str[0])
338                           goto matched;
339                       }
340                     else
341                       {
342                         const int32_t *table;
343 # if WIDE_CHAR_VERSION
344                         const int32_t *weights;
345                         const int32_t *extra;
346 # else
347                         const unsigned char *weights;
348                         const unsigned char *extra;
349 # endif
350                         const int32_t *indirect;
351                         int32_t idx;
352                         const UCHAR *cp = (const UCHAR *) str;
353 
354                         /* This #include defines a local function!  */
355 # if WIDE_CHAR_VERSION
356 #  include <locale/weightwc.h>
357 # else
358 #  include <locale/weight.h>
359 # endif
360 
361 # if WIDE_CHAR_VERSION
362                         table = (const int32_t *)
363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
364                         weights = (const int32_t *)
365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
366                         extra = (const int32_t *)
367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
368                         indirect = (const int32_t *)
369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
370 # else
371                         table = (const int32_t *)
372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
373                         weights = (const unsigned char *)
374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
375                         extra = (const unsigned char *)
376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
377                         indirect = (const int32_t *)
378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
379 # endif
380 
381                         idx = findidx (&cp);
382                         if (idx != 0)
383                           {
384                             /* We found a table entry.  Now see whether the
385                                character we are currently at has the same
386                                equivalence class value.  */
387                             int len = weights[idx & 0xffffff];
388                             int32_t idx2;
389                             const UCHAR *np = (const UCHAR *) n;
390 
391                             idx2 = findidx (&np);
392                             if (idx2 != 0
393                                 && (idx >> 24) == (idx2 >> 24)
394                                 && len == weights[idx2 & 0xffffff])
395                               {
396                                 int cnt = 0;
397 
398                                 idx &= 0xffffff;
399                                 idx2 &= 0xffffff;
400 
401                                 while (cnt < len
402                                        && (weights[idx + 1 + cnt]
403                                            == weights[idx2 + 1 + cnt]))
404                                   ++cnt;
405 
406                                 if (cnt == len)
407                                   goto matched;
408                               }
409                           }
410                       }
411 
412                     c = *p++;
413                   }
414 #endif
415                 else if (c == L_('\0'))
416                   {
417                     /* [ unterminated, treat as normal character.  */
418                     p = p_init;
419                     n = n_init;
420                     c = L_('[');
421                     goto normal_match;
422                   }
423                 else
424                   {
425 #ifdef _LIBC
426                     bool is_seqval = false;
427 
428                     if (c == L_('[') && *p == L_('.'))
429                       {
430                         uint32_t nrules =
431                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
432                         const CHAR *startp = p;
433                         size_t c1 = 0;
434 
435                         while (1)
436                           {
437                             c = *++p;
438                             if (c == L_('.') && p[1] == L_(']'))
439                               {
440                                 p += 2;
441                                 break;
442                               }
443                             if (c == '\0')
444                               return FNM_NOMATCH;
445                             ++c1;
446                           }
447 
448                         /* We have to handling the symbols differently in
449                            ranges since then the collation sequence is
450                            important.  */
451                         is_range = *p == L_('-') && p[1] != L_('\0');
452 
453                         if (nrules == 0)
454                           {
455                             /* There are no names defined in the collation
456                                data.  Therefore we only accept the trivial
457                                names consisting of the character itself.  */
458                             if (c1 != 1)
459                               return FNM_NOMATCH;
460 
461                             if (!is_range && *n == startp[1])
462                               goto matched;
463 
464                             cold = startp[1];
465                             c = *p++;
466                           }
467                         else
468                           {
469                             int32_t table_size;
470                             const int32_t *symb_table;
471 # ifdef WIDE_CHAR_VERSION
472                             char str[c1];
473                             size_t strcnt;
474 # else
475 #  define str (startp + 1)
476 # endif
477                             const unsigned char *extra;
478                             int32_t idx;
479                             int32_t elem;
480                             int32_t second;
481                             int32_t hash;
482 
483 # ifdef WIDE_CHAR_VERSION
484                             /* We have to convert the name to a single-byte
485                                string.  This is possible since the names
486                                consist of ASCII characters and the internal
487                                representation is UCS4.  */
488                             for (strcnt = 0; strcnt < c1; ++strcnt)
489                               str[strcnt] = startp[1 + strcnt];
490 # endif
491 
492                             table_size =
493                               _NL_CURRENT_WORD (LC_COLLATE,
494                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
495                             symb_table = (const int32_t *)
496                               _NL_CURRENT (LC_COLLATE,
497                                            _NL_COLLATE_SYMB_TABLEMB);
498                             extra = (const unsigned char *)
499                               _NL_CURRENT (LC_COLLATE,
500                                            _NL_COLLATE_SYMB_EXTRAMB);
501 
502                             /* Locate the character in the hashing table.  */
503                             hash = elem_hash (str, c1);
504 
505                             idx = 0;
506                             elem = hash % table_size;
507                             if (symb_table[2 * elem] != 0)
508                               {
509                                 second = hash % (table_size - 2) + 1;
510 
511                                 do
512                                   {
513                                     /* First compare the hashing value.  */
514                                     if (symb_table[2 * elem] == hash
515                                         && (c1
516                                             == extra[symb_table[2 * elem + 1]])
517                                         && memcmp (str,
518                                                    &extra[symb_table[2 * elem
519                                                                      + 1]
520                                                           + 1], c1) == 0)
521                                       {
522                                         /* Yep, this is the entry.  */
523                                         idx = symb_table[2 * elem + 1];
524                                         idx += 1 + extra[idx];
525                                         break;
526                                       }
527 
528                                     /* Next entry.  */
529                                     elem += second;
530                                   }
531                                 while (symb_table[2 * elem] != 0);
532                               }
533 
534                             if (symb_table[2 * elem] != 0)
535                               {
536                                 /* Compare the byte sequence but only if
537                                    this is not part of a range.  */
538 # ifdef WIDE_CHAR_VERSION
539                                 int32_t *wextra;
540 
541                                 idx += 1 + extra[idx];
542                                 /* Adjust for the alignment.  */
543                                 idx = (idx + 3) & ~3;
544 
545                                 wextra = (int32_t *) &extra[idx + 4];
546 # endif
547 
548                                 if (! is_range)
549                                   {
550 # ifdef WIDE_CHAR_VERSION
551                                     for (c1 = 0;
552                                          (int32_t) c1 < wextra[idx];
553                                          ++c1)
554                                       if (n[c1] != wextra[1 + c1])
555                                         break;
556 
557                                     if ((int32_t) c1 == wextra[idx])
558                                       goto matched;
559 # else
560                                     for (c1 = 0; c1 < extra[idx]; ++c1)
561                                       if (n[c1] != extra[1 + c1])
562                                         break;
563 
564                                     if (c1 == extra[idx])
565                                       goto matched;
566 # endif
567                                   }
568 
569                                 /* Get the collation sequence value.  */
570                                 is_seqval = true;
571 # ifdef WIDE_CHAR_VERSION
572                                 cold = wextra[1 + wextra[idx]];
573 # else
574                                 /* Adjust for the alignment.  */
575                                 idx += 1 + extra[idx];
576                                 idx = (idx + 3) & ~4;
577                                 cold = *((int32_t *) &extra[idx]);
578 # endif
579 
580                                 c = *p++;
581                               }
582                             else if (c1 == 1)
583                               {
584                                 /* No valid character.  Match it as a
585                                    single byte.  */
586                                 if (!is_range && *n == str[0])
587                                   goto matched;
588 
589                                 cold = str[0];
590                                 c = *p++;
591                               }
592                             else
593                               return FNM_NOMATCH;
594                           }
595                       }
596                     else
597 # undef str
598 #endif
599                       {
600                         c = FOLD (c);
601                       normal_bracket:
602 
603                         /* We have to handling the symbols differently in
604                            ranges since then the collation sequence is
605                            important.  */
606                         is_range = (*p == L_('-') && p[1] != L_('\0')
607                                     && p[1] != L_(']'));
608 
609                         if (!is_range && c == fn)
610                           goto matched;
611 
612 #if _LIBC
613                         /* This is needed if we goto normal_bracket; from
614                            outside of is_seqval's scope.  */
615                         is_seqval = false;
616 #endif
617 
618                         cold = c;
619                         c = *p++;
620                       }
621 
622                     if (c == L_('-') && *p != L_(']'))
623                       {
624 #if _LIBC
625                         /* We have to find the collation sequence
626                            value for C.  Collation sequence is nothing
627                            we can regularly access.  The sequence
628                            value is defined by the order in which the
629                            definitions of the collation values for the
630                            various characters appear in the source
631                            file.  A strange concept, nowhere
632                            documented.  */
633                         uint32_t fcollseq;
634                         uint32_t lcollseq;
635                         UCHAR cend = *p++;
636 
637 # ifdef WIDE_CHAR_VERSION
638                         /* Search in the 'names' array for the characters.  */
639                         fcollseq = __collseq_table_lookup (collseq, fn);
640                         if (fcollseq == ~((uint32_t) 0))
641                           /* XXX We don't know anything about the character
642                              we are supposed to match.  This means we are
643                              failing.  */
644                           goto range_not_matched;
645 
646                         if (is_seqval)
647                           lcollseq = cold;
648                         else
649                           lcollseq = __collseq_table_lookup (collseq, cold);
650 # else
651                         fcollseq = collseq[fn];
652                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
653 # endif
654 
655                         is_seqval = false;
656                         if (cend == L_('[') && *p == L_('.'))
657                           {
658                             uint32_t nrules =
659                               _NL_CURRENT_WORD (LC_COLLATE,
660                                                 _NL_COLLATE_NRULES);
661                             const CHAR *startp = p;
662                             size_t c1 = 0;
663 
664                             while (1)
665                               {
666                                 c = *++p;
667                                 if (c == L_('.') && p[1] == L_(']'))
668                                   {
669                                     p += 2;
670                                     break;
671                                   }
672                                 if (c == '\0')
673                                   return FNM_NOMATCH;
674                                 ++c1;
675                               }
676 
677                             if (nrules == 0)
678                               {
679                                 /* There are no names defined in the
680                                    collation data.  Therefore we only
681                                    accept the trivial names consisting
682                                    of the character itself.  */
683                                 if (c1 != 1)
684                                   return FNM_NOMATCH;
685 
686                                 cend = startp[1];
687                               }
688                             else
689                               {
690                                 int32_t table_size;
691                                 const int32_t *symb_table;
692 # ifdef WIDE_CHAR_VERSION
693                                 char str[c1];
694                                 size_t strcnt;
695 # else
696 #  define str (startp + 1)
697 # endif
698                                 const unsigned char *extra;
699                                 int32_t idx;
700                                 int32_t elem;
701                                 int32_t second;
702                                 int32_t hash;
703 
704 # ifdef WIDE_CHAR_VERSION
705                                 /* We have to convert the name to a single-byte
706                                    string.  This is possible since the names
707                                    consist of ASCII characters and the internal
708                                    representation is UCS4.  */
709                                 for (strcnt = 0; strcnt < c1; ++strcnt)
710                                   str[strcnt] = startp[1 + strcnt];
711 # endif
712 
713                                 table_size =
714                                   _NL_CURRENT_WORD (LC_COLLATE,
715                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
716                                 symb_table = (const int32_t *)
717                                   _NL_CURRENT (LC_COLLATE,
718                                                _NL_COLLATE_SYMB_TABLEMB);
719                                 extra = (const unsigned char *)
720                                   _NL_CURRENT (LC_COLLATE,
721                                                _NL_COLLATE_SYMB_EXTRAMB);
722 
723                                 /* Locate the character in the hashing
724                                    table.  */
725                                 hash = elem_hash (str, c1);
726 
727                                 idx = 0;
728                                 elem = hash % table_size;
729                                 if (symb_table[2 * elem] != 0)
730                                   {
731                                     second = hash % (table_size - 2) + 1;
732 
733                                     do
734                                       {
735                                         /* First compare the hashing value.  */
736                                         if (symb_table[2 * elem] == hash
737                                             && (c1
738                                                 == extra[symb_table[2 * elem + 1]])
739                                             && memcmp (str,
740                                                        &extra[symb_table[2 * elem + 1]
741                                                               + 1], c1) == 0)
742                                           {
743                                             /* Yep, this is the entry.  */
744                                             idx = symb_table[2 * elem + 1];
745                                             idx += 1 + extra[idx];
746                                             break;
747                                           }
748 
749                                         /* Next entry.  */
750                                         elem += second;
751                                       }
752                                     while (symb_table[2 * elem] != 0);
753                                   }
754 
755                                 if (symb_table[2 * elem] != 0)
756                                   {
757                                     /* Compare the byte sequence but only if
758                                        this is not part of a range.  */
759 # ifdef WIDE_CHAR_VERSION
760                                     int32_t *wextra;
761 
762                                     idx += 1 + extra[idx];
763                                     /* Adjust for the alignment.  */
764                                     idx = (idx + 3) & ~4;
765 
766                                     wextra = (int32_t *) &extra[idx + 4];
767 # endif
768                                     /* Get the collation sequence value.  */
769                                     is_seqval = true;
770 # ifdef WIDE_CHAR_VERSION
771                                     cend = wextra[1 + wextra[idx]];
772 # else
773                                     /* Adjust for the alignment.  */
774                                     idx += 1 + extra[idx];
775                                     idx = (idx + 3) & ~4;
776                                     cend = *((int32_t *) &extra[idx]);
777 # endif
778                                   }
779                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
780                                   {
781                                     cend = str[0];
782                                     c = *p++;
783                                   }
784                                 else
785                                   return FNM_NOMATCH;
786                               }
787 # undef str
788                           }
789                         else
790                           {
791                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
792                               cend = *p++;
793                             if (cend == L_('\0'))
794                               return FNM_NOMATCH;
795                             cend = FOLD (cend);
796                           }
797 
798                         /* XXX It is not entirely clear to me how to handle
799                            characters which are not mentioned in the
800                            collation specification.  */
801                         if (
802 # ifdef WIDE_CHAR_VERSION
803                             lcollseq == 0xffffffff ||
804 # endif
805                             lcollseq <= fcollseq)
806                           {
807                             /* We have to look at the upper bound.  */
808                             uint32_t hcollseq;
809 
810                             if (is_seqval)
811                               hcollseq = cend;
812                             else
813                               {
814 # ifdef WIDE_CHAR_VERSION
815                                 hcollseq =
816                                   __collseq_table_lookup (collseq, cend);
817                                 if (hcollseq == ~((uint32_t) 0))
818                                   {
819                                     /* Hum, no information about the upper
820                                        bound.  The matching succeeds if the
821                                        lower bound is matched exactly.  */
822                                     if (lcollseq != fcollseq)
823                                       goto range_not_matched;
824 
825                                     goto matched;
826                                   }
827 # else
828                                 hcollseq = collseq[cend];
829 # endif
830                               }
831 
832                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
833                               goto matched;
834                           }
835 # ifdef WIDE_CHAR_VERSION
836                       range_not_matched:
837 # endif
838 #else
839                         /* We use a boring value comparison of the character
840                            values.  This is better than comparing using
841                            'strcoll' since the latter would have surprising
842                            and sometimes fatal consequences.  */
843                         UCHAR cend = *p++;
844 
845                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
846                           cend = *p++;
847                         if (cend == L_('\0'))
848                           return FNM_NOMATCH;
849 
850                         /* It is a range.  */
851                         if (cold <= fn && fn <= cend)
852                           goto matched;
853 #endif
854 
855                         c = *p++;
856                       }
857                   }
858 
859                 if (c == L_(']'))
860                   break;
861               }
862 
863             if (!not)
864               return FNM_NOMATCH;
865             break;
866 
867           matched:
868             /* Skip the rest of the [...] that already matched.  */
869             do
870               {
871               ignore_next:
872                 c = *p++;
873 
874                 if (c == L_('\0'))
875                   /* [... (unterminated) loses.  */
876                   return FNM_NOMATCH;
877 
878                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
879                   {
880                     if (*p == L_('\0'))
881                       return FNM_NOMATCH;
882                     /* XXX 1003.2d11 is unclear if this is right.  */
883                     ++p;
884                   }
885                 else if (c == L_('[') && *p == L_(':'))
886                   {
887                     int c1 = 0;
888                     const CHAR *startp = p;
889 
890                     while (1)
891                       {
892                         c = *++p;
893                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
894                           return FNM_NOMATCH;
895 
896                         if (*p == L_(':') && p[1] == L_(']'))
897                           break;
898 
899                         if (c < L_('a') || c >= L_('z'))
900                           {
901                             p = startp;
902                             goto ignore_next;
903                           }
904                       }
905                     p += 2;
906                     c = *p++;
907                   }
908                 else if (c == L_('[') && *p == L_('='))
909                   {
910                     c = *++p;
911                     if (c == L_('\0'))
912                       return FNM_NOMATCH;
913                     c = *++p;
914                     if (c != L_('=') || p[1] != L_(']'))
915                       return FNM_NOMATCH;
916                     p += 2;
917                     c = *p++;
918                   }
919                 else if (c == L_('[') && *p == L_('.'))
920                   {
921                     ++p;
922                     while (1)
923                       {
924                         c = *++p;
925                         if (c == '\0')
926                           return FNM_NOMATCH;
927 
928                         if (*p == L_('.') && p[1] == L_(']'))
929                           break;
930                       }
931                     p += 2;
932                     c = *p++;
933                   }
934               }
935             while (c != L_(']'));
936             if (not)
937               return FNM_NOMATCH;
938           }
939           break;
940 
941         case L_('+'):
942         case L_('@'):
943         case L_('!'):
944           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
945             {
946               int res;
947 
948               res = EXT (c, p, n, string_end, no_leading_period, flags);
949               if (res != -1)
950                 return res;
951             }
952           goto normal_match;
953 
954         case L_('/'):
955           if (NO_LEADING_PERIOD (flags))
956             {
957               if (n == string_end || c != (UCHAR) *n)
958                 return FNM_NOMATCH;
959 
960               new_no_leading_period = true;
961               break;
962             }
963           FALLTHROUGH;
964         default:
965         normal_match:
966           if (n == string_end || c != FOLD ((UCHAR) *n))
967             return FNM_NOMATCH;
968         }
969 
970       no_leading_period = new_no_leading_period;
971       ++n;
972     }
973 
974   if (n == string_end)
975     return 0;
976 
977   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
978     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
979     return 0;
980 
981   return FNM_NOMATCH;
982 }
983 
984 
985 static const CHAR *
986 internal_function
987 END (const CHAR *pattern)
988 {
989   const CHAR *p = pattern;
990 
991   while (1)
992     if (*++p == L_('\0'))
993       /* This is an invalid pattern.  */
994       return pattern;
995     else if (*p == L_('['))
996       {
997         /* Handle brackets special.  */
998         if (posixly_correct == 0)
999           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1000 
1001         /* Skip the not sign.  We have to recognize it because of a possibly
1002            following ']'.  */
1003         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1004           ++p;
1005         /* A leading ']' is recognized as such.  */
1006         if (*p == L_(']'))
1007           ++p;
1008         /* Skip over all characters of the list.  */
1009         while (*p != L_(']'))
1010           if (*p++ == L_('\0'))
1011             /* This is no valid pattern.  */
1012             return pattern;
1013       }
1014     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1015               || *p == L_('!')) && p[1] == L_('('))
1016       p = END (p + 1);
1017     else if (*p == L_(')'))
1018       break;
1019 
1020   return p + 1;
1021 }
1022 
1023 
1024 static int
1025 internal_function
1026 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1027      bool no_leading_period, int flags)
1028 {
1029   const CHAR *startp;
1030   size_t level;
1031   struct patternlist
1032   {
1033     struct patternlist *next;
1034     CHAR str[FLEXIBLE_ARRAY_MEMBER];
1035   } *list = NULL;
1036   struct patternlist **lastp = &list;
1037   size_t pattern_len = STRLEN (pattern);
1038   const CHAR *p;
1039   const CHAR *rs;
1040   enum { ALLOCA_LIMIT = 8000 };
1041 
1042   /* Parse the pattern.  Store the individual parts in the list.  */
1043   level = 0;
1044   for (startp = p = pattern + 1; ; ++p)
1045     if (*p == L_('\0'))
1046       /* This is an invalid pattern.  */
1047       return -1;
1048     else if (*p == L_('['))
1049       {
1050         /* Handle brackets special.  */
1051         if (posixly_correct == 0)
1052           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1053 
1054         /* Skip the not sign.  We have to recognize it because of a possibly
1055            following ']'.  */
1056         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1057           ++p;
1058         /* A leading ']' is recognized as such.  */
1059         if (*p == L_(']'))
1060           ++p;
1061         /* Skip over all characters of the list.  */
1062         while (*p != L_(']'))
1063           if (*p++ == L_('\0'))
1064             /* This is no valid pattern.  */
1065             return -1;
1066       }
1067     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1068               || *p == L_('!')) && p[1] == L_('('))
1069       /* Remember the nesting level.  */
1070       ++level;
1071     else if (*p == L_(')'))
1072       {
1073         if (level-- == 0)
1074           {
1075             /* This means we found the end of the pattern.  */
1076 #define NEW_PATTERN \
1077             struct patternlist *newp;                                         \
1078             size_t plen;                                                      \
1079             size_t plensize;                                                  \
1080             size_t newpsize;                                                  \
1081                                                                               \
1082             plen = (opt == L_('?') || opt == L_('@')                          \
1083                     ? pattern_len                                             \
1084                     : p - startp + 1UL);                                      \
1085             plensize = plen * sizeof (CHAR);                                  \
1086             newpsize = FLEXSIZEOF (struct patternlist, str, plensize);        \
1087             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1088                 || newpsize < offsetof (struct patternlist, str)              \
1089                 || ALLOCA_LIMIT <= newpsize)                                  \
1090               return -1;                                                      \
1091             newp = (struct patternlist *) alloca (newpsize);                  \
1092             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1093             newp->next = NULL;                                                \
1094             *lastp = newp;                                                    \
1095             lastp = &newp->next
1096             NEW_PATTERN;
1097             break;
1098           }
1099       }
1100     else if (*p == L_('|'))
1101       {
1102         if (level == 0)
1103           {
1104             NEW_PATTERN;
1105             startp = p + 1;
1106           }
1107       }
1108   assert (list != NULL);
1109   assert (p[-1] == L_(')'));
1110 #undef NEW_PATTERN
1111 
1112   switch (opt)
1113     {
1114     case L_('*'):
1115       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1116         return 0;
1117       FALLTHROUGH;
1118     case L_('+'):
1119       do
1120         {
1121           for (rs = string; rs <= string_end; ++rs)
1122             /* First match the prefix with the current pattern with the
1123                current pattern.  */
1124             if (FCT (list->str, string, rs, no_leading_period,
1125                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1126                 /* This was successful.  Now match the rest with the rest
1127                    of the pattern.  */
1128                 && (FCT (p, rs, string_end,
1129                          rs == string
1130                          ? no_leading_period
1131                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1132                          flags & FNM_FILE_NAME
1133                          ? flags : flags & ~FNM_PERIOD) == 0
1134                     /* This didn't work.  Try the whole pattern.  */
1135                     || (rs != string
1136                         && FCT (pattern - 1, rs, string_end,
1137                                 rs == string
1138                                 ? no_leading_period
1139                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1140                                 flags & FNM_FILE_NAME
1141                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1142               /* It worked.  Signal success.  */
1143               return 0;
1144         }
1145       while ((list = list->next) != NULL);
1146 
1147       /* None of the patterns lead to a match.  */
1148       return FNM_NOMATCH;
1149 
1150     case L_('?'):
1151       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1152         return 0;
1153       FALLTHROUGH;
1154     case L_('@'):
1155       do
1156         /* I cannot believe it but 'strcat' is actually acceptable
1157            here.  Match the entire string with the prefix from the
1158            pattern list and the rest of the pattern following the
1159            pattern list.  */
1160         if (FCT (STRCAT (list->str, p), string, string_end,
1161                  no_leading_period,
1162                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1163           /* It worked.  Signal success.  */
1164           return 0;
1165       while ((list = list->next) != NULL);
1166 
1167       /* None of the patterns lead to a match.  */
1168       return FNM_NOMATCH;
1169 
1170     case L_('!'):
1171       for (rs = string; rs <= string_end; ++rs)
1172         {
1173           struct patternlist *runp;
1174 
1175           for (runp = list; runp != NULL; runp = runp->next)
1176             if (FCT (runp->str, string, rs,  no_leading_period,
1177                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1178               break;
1179 
1180           /* If none of the patterns matched see whether the rest does.  */
1181           if (runp == NULL
1182               && (FCT (p, rs, string_end,
1183                        rs == string
1184                        ? no_leading_period
1185                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1186                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1187                   == 0))
1188             /* This is successful.  */
1189             return 0;
1190         }
1191 
1192       /* None of the patterns together with the rest of the pattern
1193          lead to a match.  */
1194       return FNM_NOMATCH;
1195 
1196     default:
1197       assert (! "Invalid extended matching operator");
1198       break;
1199     }
1200 
1201   return -1;
1202 }
1203 
1204 
1205 #undef FOLD
1206 #undef CHAR
1207 #undef UCHAR
1208 #undef INT
1209 #undef FCT
1210 #undef EXT
1211 #undef END
1212 #undef MEMPCPY
1213 #undef MEMCHR
1214 #undef STRLEN
1215 #undef STRCAT
1216 #undef L_
1217 #undef BTOWC
1218