xref: /dflybsd-src/contrib/diffutils/lib/fnmatch_loop.c (revision a816ce8e217e98e622474a79fb633b37dc22092e)
1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 /* Match STRING against the file name pattern PATTERN, returning zero if
21    it matches, nonzero if not.  */
22 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
23                 const CHAR *string_end, bool no_leading_period, int flags)
24      internal_function;
25 static const CHAR *END (const CHAR *patternp) internal_function;
26 
27 static int
28 internal_function
29 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
30      bool no_leading_period, int flags)
31 {
32   register const CHAR *p = pattern, *n = string;
33   register UCHAR c;
34 #ifdef _LIBC
35 # if WIDE_CHAR_VERSION
36   const char *collseq = (const char *)
37     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
38 # else
39   const UCHAR *collseq = (const UCHAR *)
40     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
41 # endif
42 #endif
43 
44   while ((c = *p++) != L_('\0'))
45     {
46       bool new_no_leading_period = false;
47       c = FOLD (c);
48 
49       switch (c)
50         {
51         case L_('?'):
52           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
53             {
54               int res;
55 
56               res = EXT (c, p, n, string_end, no_leading_period,
57                          flags);
58               if (res != -1)
59                 return res;
60             }
61 
62           if (n == string_end)
63             return FNM_NOMATCH;
64           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
65             return FNM_NOMATCH;
66           else if (*n == L_('.') && no_leading_period)
67             return FNM_NOMATCH;
68           break;
69 
70         case L_('\\'):
71           if (!(flags & FNM_NOESCAPE))
72             {
73               c = *p++;
74               if (c == L_('\0'))
75                 /* Trailing \ loses.  */
76                 return FNM_NOMATCH;
77               c = FOLD (c);
78             }
79           if (n == string_end || FOLD ((UCHAR) *n) != c)
80             return FNM_NOMATCH;
81           break;
82 
83         case L_('*'):
84           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
85             {
86               int res;
87 
88               res = EXT (c, p, n, string_end, no_leading_period,
89                          flags);
90               if (res != -1)
91                 return res;
92             }
93 
94           if (n != string_end && *n == L_('.') && no_leading_period)
95             return FNM_NOMATCH;
96 
97           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
98             {
99               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
100                 {
101                   const CHAR *endp = END (p);
102                   if (endp != p)
103                     {
104                       /* This is a pattern.  Skip over it.  */
105                       p = endp;
106                       continue;
107                     }
108                 }
109 
110               if (c == L_('?'))
111                 {
112                   /* A ? needs to match one character.  */
113                   if (n == string_end)
114                     /* There isn't another character; no match.  */
115                     return FNM_NOMATCH;
116                   else if (*n == L_('/')
117                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
118                     /* A slash does not match a wildcard under
119                        FNM_FILE_NAME.  */
120                     return FNM_NOMATCH;
121                   else
122                     /* One character of the string is consumed in matching
123                        this ? wildcard, so *??? won't match if there are
124                        less than three characters.  */
125                     ++n;
126                 }
127             }
128 
129           if (c == L_('\0'))
130             /* The wildcard(s) is/are the last element of the pattern.
131                If the name is a file name and contains another slash
132                this means it cannot match, unless the FNM_LEADING_DIR
133                flag is set.  */
134             {
135               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
136 
137               if (flags & FNM_FILE_NAME)
138                 {
139                   if (flags & FNM_LEADING_DIR)
140                     result = 0;
141                   else
142                     {
143                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
144                         result = 0;
145                     }
146                 }
147 
148               return result;
149             }
150           else
151             {
152               const CHAR *endp;
153 
154               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
155                              string_end - n);
156               if (endp == NULL)
157                 endp = string_end;
158 
159               if (c == L_('[')
160                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
161                       && (c == L_('@') || c == L_('+') || c == L_('!'))
162                       && *p == L_('(')))
163                 {
164                   int flags2 = ((flags & FNM_FILE_NAME)
165                                 ? flags : (flags & ~FNM_PERIOD));
166                   bool no_leading_period2 = no_leading_period;
167 
168                   for (--p; n < endp; ++n, no_leading_period2 = false)
169                     if (FCT (p, n, string_end, no_leading_period2, flags2)
170                         == 0)
171                       return 0;
172                 }
173               else if (c == L_('/') && (flags & FNM_FILE_NAME))
174                 {
175                   while (n < string_end && *n != L_('/'))
176                     ++n;
177                   if (n < string_end && *n == L_('/')
178                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
179                           == 0))
180                     return 0;
181                 }
182               else
183                 {
184                   int flags2 = ((flags & FNM_FILE_NAME)
185                                 ? flags : (flags & ~FNM_PERIOD));
186                   int no_leading_period2 = no_leading_period;
187 
188                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
189                     c = *p;
190                   c = FOLD (c);
191                   for (--p; n < endp; ++n, no_leading_period2 = false)
192                     if (FOLD ((UCHAR) *n) == c
193                         && (FCT (p, n, string_end, no_leading_period2, flags2)
194                             == 0))
195                       return 0;
196                 }
197             }
198 
199           /* If we come here no match is possible with the wildcard.  */
200           return FNM_NOMATCH;
201 
202         case L_('['):
203           {
204             /* Nonzero if the sense of the character class is inverted.  */
205             const CHAR *p_init = p;
206             const CHAR *n_init = n;
207             register bool not;
208             CHAR cold;
209             UCHAR fn;
210 
211             if (posixly_correct == 0)
212               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
213 
214             if (n == string_end)
215               return FNM_NOMATCH;
216 
217             if (*n == L_('.') && no_leading_period)
218               return FNM_NOMATCH;
219 
220             if (*n == L_('/') && (flags & FNM_FILE_NAME))
221               /* `/' cannot be matched.  */
222               return FNM_NOMATCH;
223 
224             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
225             if (not)
226               ++p;
227 
228             fn = FOLD ((UCHAR) *n);
229 
230             c = *p++;
231             for (;;)
232               {
233                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
234                   {
235                     if (*p == L_('\0'))
236                       return FNM_NOMATCH;
237                     c = FOLD ((UCHAR) *p);
238                     ++p;
239 
240                     goto normal_bracket;
241                   }
242                 else if (c == L_('[') && *p == L_(':'))
243                   {
244                     /* Leave room for the null.  */
245                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
246                     size_t c1 = 0;
247 #if defined _LIBC || WIDE_CHAR_SUPPORT
248                     wctype_t wt;
249 #endif
250                     const CHAR *startp = p;
251 
252                     for (;;)
253                       {
254                         if (c1 == CHAR_CLASS_MAX_LENGTH)
255                           /* The name is too long and therefore the pattern
256                              is ill-formed.  */
257                           return FNM_NOMATCH;
258 
259                         c = *++p;
260                         if (c == L_(':') && p[1] == L_(']'))
261                           {
262                             p += 2;
263                             break;
264                           }
265                         if (c < L_('a') || c >= L_('z'))
266                           {
267                             /* This cannot possibly be a character class name.
268                                Match it as a normal range.  */
269                             p = startp;
270                             c = L_('[');
271                             goto normal_bracket;
272                           }
273                         str[c1++] = c;
274                       }
275                     str[c1] = L_('\0');
276 
277 #if defined _LIBC || WIDE_CHAR_SUPPORT
278                     wt = IS_CHAR_CLASS (str);
279                     if (wt == 0)
280                       /* Invalid character class name.  */
281                       return FNM_NOMATCH;
282 
283 # if defined _LIBC && ! WIDE_CHAR_VERSION
284                     /* The following code is glibc specific but does
285                        there a good job in speeding up the code since
286                        we can avoid the btowc() call.  */
287                     if (_ISCTYPE ((UCHAR) *n, wt))
288                       goto matched;
289 # else
290                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291                       goto matched;
292 # endif
293 #else
294                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
295                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
296                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
297                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
298                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
299                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
300                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
301                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
302                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
303                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
304                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
305                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
306                       goto matched;
307 #endif
308                     c = *p++;
309                   }
310 #ifdef _LIBC
311                 else if (c == L_('[') && *p == L_('='))
312                   {
313                     UCHAR str[1];
314                     uint32_t nrules =
315                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
316                     const CHAR *startp = p;
317 
318                     c = *++p;
319                     if (c == L_('\0'))
320                       {
321                         p = startp;
322                         c = L_('[');
323                         goto normal_bracket;
324                       }
325                     str[0] = c;
326 
327                     c = *++p;
328                     if (c != L_('=') || p[1] != L_(']'))
329                       {
330                         p = startp;
331                         c = L_('[');
332                         goto normal_bracket;
333                       }
334                     p += 2;
335 
336                     if (nrules == 0)
337                       {
338                         if ((UCHAR) *n == str[0])
339                           goto matched;
340                       }
341                     else
342                       {
343                         const int32_t *table;
344 # if WIDE_CHAR_VERSION
345                         const int32_t *weights;
346                         const int32_t *extra;
347 # else
348                         const unsigned char *weights;
349                         const unsigned char *extra;
350 # endif
351                         const int32_t *indirect;
352                         int32_t idx;
353                         const UCHAR *cp = (const UCHAR *) str;
354 
355                         /* This #include defines a local function!  */
356 # if WIDE_CHAR_VERSION
357 #  include <locale/weightwc.h>
358 # else
359 #  include <locale/weight.h>
360 # endif
361 
362 # if WIDE_CHAR_VERSION
363                         table = (const int32_t *)
364                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
365                         weights = (const int32_t *)
366                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
367                         extra = (const int32_t *)
368                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
369                         indirect = (const int32_t *)
370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
371 # else
372                         table = (const int32_t *)
373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
374                         weights = (const unsigned char *)
375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
376                         extra = (const unsigned char *)
377                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
378                         indirect = (const int32_t *)
379                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
380 # endif
381 
382                         idx = findidx (&cp);
383                         if (idx != 0)
384                           {
385                             /* We found a table entry.  Now see whether the
386                                character we are currently at has the same
387                                equivalance class value.  */
388                             int len = weights[idx & 0xffffff];
389                             int32_t idx2;
390                             const UCHAR *np = (const UCHAR *) n;
391 
392                             idx2 = findidx (&np);
393                             if (idx2 != 0
394                                 && (idx >> 24) == (idx2 >> 24)
395                                 && len == weights[idx2 & 0xffffff])
396                               {
397                                 int cnt = 0;
398 
399                                 idx &= 0xffffff;
400                                 idx2 &= 0xffffff;
401 
402                                 while (cnt < len
403                                        && (weights[idx + 1 + cnt]
404                                            == weights[idx2 + 1 + cnt]))
405                                   ++cnt;
406 
407                                 if (cnt == len)
408                                   goto matched;
409                               }
410                           }
411                       }
412 
413                     c = *p++;
414                   }
415 #endif
416                 else if (c == L_('\0'))
417                   {
418                     /* [ unterminated, treat as normal character.  */
419                     p = p_init;
420                     n = n_init;
421                     c = L_('[');
422                     goto normal_match;
423                   }
424                 else
425                   {
426                     bool is_range = false;
427 
428 #ifdef _LIBC
429                     bool is_seqval = false;
430 
431                     if (c == L_('[') && *p == L_('.'))
432                       {
433                         uint32_t nrules =
434                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
435                         const CHAR *startp = p;
436                         size_t c1 = 0;
437 
438                         while (1)
439                           {
440                             c = *++p;
441                             if (c == L_('.') && p[1] == L_(']'))
442                               {
443                                 p += 2;
444                                 break;
445                               }
446                             if (c == '\0')
447                               return FNM_NOMATCH;
448                             ++c1;
449                           }
450 
451                         /* We have to handling the symbols differently in
452                            ranges since then the collation sequence is
453                            important.  */
454                         is_range = *p == L_('-') && p[1] != L_('\0');
455 
456                         if (nrules == 0)
457                           {
458                             /* There are no names defined in the collation
459                                data.  Therefore we only accept the trivial
460                                names consisting of the character itself.  */
461                             if (c1 != 1)
462                               return FNM_NOMATCH;
463 
464                             if (!is_range && *n == startp[1])
465                               goto matched;
466 
467                             cold = startp[1];
468                             c = *p++;
469                           }
470                         else
471                           {
472                             int32_t table_size;
473                             const int32_t *symb_table;
474 # ifdef WIDE_CHAR_VERSION
475                             char str[c1];
476                             size_t strcnt;
477 # else
478 #  define str (startp + 1)
479 # endif
480                             const unsigned char *extra;
481                             int32_t idx;
482                             int32_t elem;
483                             int32_t second;
484                             int32_t hash;
485 
486 # ifdef WIDE_CHAR_VERSION
487                             /* We have to convert the name to a single-byte
488                                string.  This is possible since the names
489                                consist of ASCII characters and the internal
490                                representation is UCS4.  */
491                             for (strcnt = 0; strcnt < c1; ++strcnt)
492                               str[strcnt] = startp[1 + strcnt];
493 # endif
494 
495                             table_size =
496                               _NL_CURRENT_WORD (LC_COLLATE,
497                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
498                             symb_table = (const int32_t *)
499                               _NL_CURRENT (LC_COLLATE,
500                                            _NL_COLLATE_SYMB_TABLEMB);
501                             extra = (const unsigned char *)
502                               _NL_CURRENT (LC_COLLATE,
503                                            _NL_COLLATE_SYMB_EXTRAMB);
504 
505                             /* Locate the character in the hashing table.  */
506                             hash = elem_hash (str, c1);
507 
508                             idx = 0;
509                             elem = hash % table_size;
510                             if (symb_table[2 * elem] != 0)
511                               {
512                                 second = hash % (table_size - 2) + 1;
513 
514                                 do
515                                   {
516                                     /* First compare the hashing value.  */
517                                     if (symb_table[2 * elem] == hash
518                                         && (c1
519                                             == extra[symb_table[2 * elem + 1]])
520                                         && memcmp (str,
521                                                    &extra[symb_table[2 * elem
522                                                                      + 1]
523                                                           + 1], c1) == 0)
524                                       {
525                                         /* Yep, this is the entry.  */
526                                         idx = symb_table[2 * elem + 1];
527                                         idx += 1 + extra[idx];
528                                         break;
529                                       }
530 
531                                     /* Next entry.  */
532                                     elem += second;
533                                   }
534                                 while (symb_table[2 * elem] != 0);
535                               }
536 
537                             if (symb_table[2 * elem] != 0)
538                               {
539                                 /* Compare the byte sequence but only if
540                                    this is not part of a range.  */
541 # ifdef WIDE_CHAR_VERSION
542                                 int32_t *wextra;
543 
544                                 idx += 1 + extra[idx];
545                                 /* Adjust for the alignment.  */
546                                 idx = (idx + 3) & ~3;
547 
548                                 wextra = (int32_t *) &extra[idx + 4];
549 # endif
550 
551                                 if (! is_range)
552                                   {
553 # ifdef WIDE_CHAR_VERSION
554                                     for (c1 = 0;
555                                          (int32_t) c1 < wextra[idx];
556                                          ++c1)
557                                       if (n[c1] != wextra[1 + c1])
558                                         break;
559 
560                                     if ((int32_t) c1 == wextra[idx])
561                                       goto matched;
562 # else
563                                     for (c1 = 0; c1 < extra[idx]; ++c1)
564                                       if (n[c1] != extra[1 + c1])
565                                         break;
566 
567                                     if (c1 == extra[idx])
568                                       goto matched;
569 # endif
570                                   }
571 
572                                 /* Get the collation sequence value.  */
573                                 is_seqval = true;
574 # ifdef WIDE_CHAR_VERSION
575                                 cold = wextra[1 + wextra[idx]];
576 # else
577                                 /* Adjust for the alignment.  */
578                                 idx += 1 + extra[idx];
579                                 idx = (idx + 3) & ~4;
580                                 cold = *((int32_t *) &extra[idx]);
581 # endif
582 
583                                 c = *p++;
584                               }
585                             else if (c1 == 1)
586                               {
587                                 /* No valid character.  Match it as a
588                                    single byte.  */
589                                 if (!is_range && *n == str[0])
590                                   goto matched;
591 
592                                 cold = str[0];
593                                 c = *p++;
594                               }
595                             else
596                               return FNM_NOMATCH;
597                           }
598                       }
599                     else
600 # undef str
601 #endif
602                       {
603                         c = FOLD (c);
604                       normal_bracket:
605 
606                         /* We have to handling the symbols differently in
607                            ranges since then the collation sequence is
608                            important.  */
609                         is_range = (*p == L_('-') && p[1] != L_('\0')
610                                     && p[1] != L_(']'));
611 
612                         if (!is_range && c == fn)
613                           goto matched;
614 
615 #if _LIBC
616                         /* This is needed if we goto normal_bracket; from
617                            outside of is_seqval's scope.  */
618                         is_seqval = false;
619 #endif
620 
621                         cold = c;
622                         c = *p++;
623                       }
624 
625                     if (c == L_('-') && *p != L_(']'))
626                       {
627 #if _LIBC
628                         /* We have to find the collation sequence
629                            value for C.  Collation sequence is nothing
630                            we can regularly access.  The sequence
631                            value is defined by the order in which the
632                            definitions of the collation values for the
633                            various characters appear in the source
634                            file.  A strange concept, nowhere
635                            documented.  */
636                         uint32_t fcollseq;
637                         uint32_t lcollseq;
638                         UCHAR cend = *p++;
639 
640 # ifdef WIDE_CHAR_VERSION
641                         /* Search in the `names' array for the characters.  */
642                         fcollseq = __collseq_table_lookup (collseq, fn);
643                         if (fcollseq == ~((uint32_t) 0))
644                           /* XXX We don't know anything about the character
645                              we are supposed to match.  This means we are
646                              failing.  */
647                           goto range_not_matched;
648 
649                         if (is_seqval)
650                           lcollseq = cold;
651                         else
652                           lcollseq = __collseq_table_lookup (collseq, cold);
653 # else
654                         fcollseq = collseq[fn];
655                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
656 # endif
657 
658                         is_seqval = false;
659                         if (cend == L_('[') && *p == L_('.'))
660                           {
661                             uint32_t nrules =
662                               _NL_CURRENT_WORD (LC_COLLATE,
663                                                 _NL_COLLATE_NRULES);
664                             const CHAR *startp = p;
665                             size_t c1 = 0;
666 
667                             while (1)
668                               {
669                                 c = *++p;
670                                 if (c == L_('.') && p[1] == L_(']'))
671                                   {
672                                     p += 2;
673                                     break;
674                                   }
675                                 if (c == '\0')
676                                   return FNM_NOMATCH;
677                                 ++c1;
678                               }
679 
680                             if (nrules == 0)
681                               {
682                                 /* There are no names defined in the
683                                    collation data.  Therefore we only
684                                    accept the trivial names consisting
685                                    of the character itself.  */
686                                 if (c1 != 1)
687                                   return FNM_NOMATCH;
688 
689                                 cend = startp[1];
690                               }
691                             else
692                               {
693                                 int32_t table_size;
694                                 const int32_t *symb_table;
695 # ifdef WIDE_CHAR_VERSION
696                                 char str[c1];
697                                 size_t strcnt;
698 # else
699 #  define str (startp + 1)
700 # endif
701                                 const unsigned char *extra;
702                                 int32_t idx;
703                                 int32_t elem;
704                                 int32_t second;
705                                 int32_t hash;
706 
707 # ifdef WIDE_CHAR_VERSION
708                                 /* We have to convert the name to a single-byte
709                                    string.  This is possible since the names
710                                    consist of ASCII characters and the internal
711                                    representation is UCS4.  */
712                                 for (strcnt = 0; strcnt < c1; ++strcnt)
713                                   str[strcnt] = startp[1 + strcnt];
714 # endif
715 
716                                 table_size =
717                                   _NL_CURRENT_WORD (LC_COLLATE,
718                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
719                                 symb_table = (const int32_t *)
720                                   _NL_CURRENT (LC_COLLATE,
721                                                _NL_COLLATE_SYMB_TABLEMB);
722                                 extra = (const unsigned char *)
723                                   _NL_CURRENT (LC_COLLATE,
724                                                _NL_COLLATE_SYMB_EXTRAMB);
725 
726                                 /* Locate the character in the hashing
727                                    table.  */
728                                 hash = elem_hash (str, c1);
729 
730                                 idx = 0;
731                                 elem = hash % table_size;
732                                 if (symb_table[2 * elem] != 0)
733                                   {
734                                     second = hash % (table_size - 2) + 1;
735 
736                                     do
737                                       {
738                                         /* First compare the hashing value.  */
739                                         if (symb_table[2 * elem] == hash
740                                             && (c1
741                                                 == extra[symb_table[2 * elem + 1]])
742                                             && memcmp (str,
743                                                        &extra[symb_table[2 * elem + 1]
744                                                               + 1], c1) == 0)
745                                           {
746                                             /* Yep, this is the entry.  */
747                                             idx = symb_table[2 * elem + 1];
748                                             idx += 1 + extra[idx];
749                                             break;
750                                           }
751 
752                                         /* Next entry.  */
753                                         elem += second;
754                                       }
755                                     while (symb_table[2 * elem] != 0);
756                                   }
757 
758                                 if (symb_table[2 * elem] != 0)
759                                   {
760                                     /* Compare the byte sequence but only if
761                                        this is not part of a range.  */
762 # ifdef WIDE_CHAR_VERSION
763                                     int32_t *wextra;
764 
765                                     idx += 1 + extra[idx];
766                                     /* Adjust for the alignment.  */
767                                     idx = (idx + 3) & ~4;
768 
769                                     wextra = (int32_t *) &extra[idx + 4];
770 # endif
771                                     /* Get the collation sequence value.  */
772                                     is_seqval = true;
773 # ifdef WIDE_CHAR_VERSION
774                                     cend = wextra[1 + wextra[idx]];
775 # else
776                                     /* Adjust for the alignment.  */
777                                     idx += 1 + extra[idx];
778                                     idx = (idx + 3) & ~4;
779                                     cend = *((int32_t *) &extra[idx]);
780 # endif
781                                   }
782                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
783                                   {
784                                     cend = str[0];
785                                     c = *p++;
786                                   }
787                                 else
788                                   return FNM_NOMATCH;
789                               }
790 # undef str
791                           }
792                         else
793                           {
794                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
795                               cend = *p++;
796                             if (cend == L_('\0'))
797                               return FNM_NOMATCH;
798                             cend = FOLD (cend);
799                           }
800 
801                         /* XXX It is not entirely clear to me how to handle
802                            characters which are not mentioned in the
803                            collation specification.  */
804                         if (
805 # ifdef WIDE_CHAR_VERSION
806                             lcollseq == 0xffffffff ||
807 # endif
808                             lcollseq <= fcollseq)
809                           {
810                             /* We have to look at the upper bound.  */
811                             uint32_t hcollseq;
812 
813                             if (is_seqval)
814                               hcollseq = cend;
815                             else
816                               {
817 # ifdef WIDE_CHAR_VERSION
818                                 hcollseq =
819                                   __collseq_table_lookup (collseq, cend);
820                                 if (hcollseq == ~((uint32_t) 0))
821                                   {
822                                     /* Hum, no information about the upper
823                                        bound.  The matching succeeds if the
824                                        lower bound is matched exactly.  */
825                                     if (lcollseq != fcollseq)
826                                       goto range_not_matched;
827 
828                                     goto matched;
829                                   }
830 # else
831                                 hcollseq = collseq[cend];
832 # endif
833                               }
834 
835                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
836                               goto matched;
837                           }
838 # ifdef WIDE_CHAR_VERSION
839                       range_not_matched:
840 # endif
841 #else
842                         /* We use a boring value comparison of the character
843                            values.  This is better than comparing using
844                            `strcoll' since the latter would have surprising
845                            and sometimes fatal consequences.  */
846                         UCHAR cend = *p++;
847 
848                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
849                           cend = *p++;
850                         if (cend == L_('\0'))
851                           return FNM_NOMATCH;
852 
853                         /* It is a range.  */
854                         if (cold <= fn && fn <= cend)
855                           goto matched;
856 #endif
857 
858                         c = *p++;
859                       }
860                   }
861 
862                 if (c == L_(']'))
863                   break;
864               }
865 
866             if (!not)
867               return FNM_NOMATCH;
868             break;
869 
870           matched:
871             /* Skip the rest of the [...] that already matched.  */
872             do
873               {
874               ignore_next:
875                 c = *p++;
876 
877                 if (c == L_('\0'))
878                   /* [... (unterminated) loses.  */
879                   return FNM_NOMATCH;
880 
881                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
882                   {
883                     if (*p == L_('\0'))
884                       return FNM_NOMATCH;
885                     /* XXX 1003.2d11 is unclear if this is right.  */
886                     ++p;
887                   }
888                 else if (c == L_('[') && *p == L_(':'))
889                   {
890                     int c1 = 0;
891                     const CHAR *startp = p;
892 
893                     while (1)
894                       {
895                         c = *++p;
896                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
897                           return FNM_NOMATCH;
898 
899                         if (*p == L_(':') && p[1] == L_(']'))
900                           break;
901 
902                         if (c < L_('a') || c >= L_('z'))
903                           {
904                             p = startp;
905                             goto ignore_next;
906                           }
907                       }
908                     p += 2;
909                     c = *p++;
910                   }
911                 else if (c == L_('[') && *p == L_('='))
912                   {
913                     c = *++p;
914                     if (c == L_('\0'))
915                       return FNM_NOMATCH;
916                     c = *++p;
917                     if (c != L_('=') || p[1] != L_(']'))
918                       return FNM_NOMATCH;
919                     p += 2;
920                     c = *p++;
921                   }
922                 else if (c == L_('[') && *p == L_('.'))
923                   {
924                     ++p;
925                     while (1)
926                       {
927                         c = *++p;
928                         if (c == '\0')
929                           return FNM_NOMATCH;
930 
931                         if (*p == L_('.') && p[1] == L_(']'))
932                           break;
933                       }
934                     p += 2;
935                     c = *p++;
936                   }
937               }
938             while (c != L_(']'));
939             if (not)
940               return FNM_NOMATCH;
941           }
942           break;
943 
944         case L_('+'):
945         case L_('@'):
946         case L_('!'):
947           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
948             {
949               int res;
950 
951               res = EXT (c, p, n, string_end, no_leading_period, flags);
952               if (res != -1)
953                 return res;
954             }
955           goto normal_match;
956 
957         case L_('/'):
958           if (NO_LEADING_PERIOD (flags))
959             {
960               if (n == string_end || c != (UCHAR) *n)
961                 return FNM_NOMATCH;
962 
963               new_no_leading_period = true;
964               break;
965             }
966           /* FALLTHROUGH */
967         default:
968         normal_match:
969           if (n == string_end || c != FOLD ((UCHAR) *n))
970             return FNM_NOMATCH;
971         }
972 
973       no_leading_period = new_no_leading_period;
974       ++n;
975     }
976 
977   if (n == string_end)
978     return 0;
979 
980   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
981     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
982     return 0;
983 
984   return FNM_NOMATCH;
985 }
986 
987 
988 static const CHAR *
989 internal_function
990 END (const CHAR *pattern)
991 {
992   const CHAR *p = pattern;
993 
994   while (1)
995     if (*++p == L_('\0'))
996       /* This is an invalid pattern.  */
997       return pattern;
998     else if (*p == L_('['))
999       {
1000         /* Handle brackets special.  */
1001         if (posixly_correct == 0)
1002           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1003 
1004         /* Skip the not sign.  We have to recognize it because of a possibly
1005            following ']'.  */
1006         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1007           ++p;
1008         /* A leading ']' is recognized as such.  */
1009         if (*p == L_(']'))
1010           ++p;
1011         /* Skip over all characters of the list.  */
1012         while (*p != L_(']'))
1013           if (*p++ == L_('\0'))
1014             /* This is no valid pattern.  */
1015             return pattern;
1016       }
1017     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1018               || *p == L_('!')) && p[1] == L_('('))
1019       p = END (p + 1);
1020     else if (*p == L_(')'))
1021       break;
1022 
1023   return p + 1;
1024 }
1025 
1026 
1027 static int
1028 internal_function
1029 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1030      bool no_leading_period, int flags)
1031 {
1032   const CHAR *startp;
1033   size_t level;
1034   struct patternlist
1035   {
1036     struct patternlist *next;
1037     CHAR str[1];
1038   } *list = NULL;
1039   struct patternlist **lastp = &list;
1040   size_t pattern_len = STRLEN (pattern);
1041   const CHAR *p;
1042   const CHAR *rs;
1043   enum { ALLOCA_LIMIT = 8000 };
1044 
1045   /* Parse the pattern.  Store the individual parts in the list.  */
1046   level = 0;
1047   for (startp = p = pattern + 1; ; ++p)
1048     if (*p == L_('\0'))
1049       /* This is an invalid pattern.  */
1050       return -1;
1051     else if (*p == L_('['))
1052       {
1053         /* Handle brackets special.  */
1054         if (posixly_correct == 0)
1055           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1056 
1057         /* Skip the not sign.  We have to recognize it because of a possibly
1058            following ']'.  */
1059         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1060           ++p;
1061         /* A leading ']' is recognized as such.  */
1062         if (*p == L_(']'))
1063           ++p;
1064         /* Skip over all characters of the list.  */
1065         while (*p != L_(']'))
1066           if (*p++ == L_('\0'))
1067             /* This is no valid pattern.  */
1068             return -1;
1069       }
1070     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1071               || *p == L_('!')) && p[1] == L_('('))
1072       /* Remember the nesting level.  */
1073       ++level;
1074     else if (*p == L_(')'))
1075       {
1076         if (level-- == 0)
1077           {
1078             /* This means we found the end of the pattern.  */
1079 #define NEW_PATTERN \
1080             struct patternlist *newp;                                         \
1081             size_t plen;                                                      \
1082             size_t plensize;                                                  \
1083             size_t newpsize;                                                  \
1084                                                                               \
1085             plen = (opt == L_('?') || opt == L_('@')                          \
1086                     ? pattern_len                                             \
1087                     : p - startp + 1UL);                                      \
1088             plensize = plen * sizeof (CHAR);                                  \
1089             newpsize = offsetof (struct patternlist, str) + plensize;         \
1090             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1091                 || newpsize < offsetof (struct patternlist, str)              \
1092                 || ALLOCA_LIMIT <= newpsize)                                  \
1093               return -1;                                                      \
1094             newp = (struct patternlist *) alloca (newpsize);                  \
1095             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1096             newp->next = NULL;                                                \
1097             *lastp = newp;                                                    \
1098             lastp = &newp->next
1099             NEW_PATTERN;
1100             break;
1101           }
1102       }
1103     else if (*p == L_('|'))
1104       {
1105         if (level == 0)
1106           {
1107             NEW_PATTERN;
1108             startp = p + 1;
1109           }
1110       }
1111   assert (list != NULL);
1112   assert (p[-1] == L_(')'));
1113 #undef NEW_PATTERN
1114 
1115   switch (opt)
1116     {
1117     case L_('*'):
1118       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1119         return 0;
1120       /* FALLTHROUGH */
1121 
1122     case L_('+'):
1123       do
1124         {
1125           for (rs = string; rs <= string_end; ++rs)
1126             /* First match the prefix with the current pattern with the
1127                current pattern.  */
1128             if (FCT (list->str, string, rs, no_leading_period,
1129                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1130                 /* This was successful.  Now match the rest with the rest
1131                    of the pattern.  */
1132                 && (FCT (p, rs, string_end,
1133                          rs == string
1134                          ? no_leading_period
1135                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1136                          flags & FNM_FILE_NAME
1137                          ? flags : flags & ~FNM_PERIOD) == 0
1138                     /* This didn't work.  Try the whole pattern.  */
1139                     || (rs != string
1140                         && FCT (pattern - 1, rs, string_end,
1141                                 rs == string
1142                                 ? no_leading_period
1143                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1144                                 flags & FNM_FILE_NAME
1145                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1146               /* It worked.  Signal success.  */
1147               return 0;
1148         }
1149       while ((list = list->next) != NULL);
1150 
1151       /* None of the patterns lead to a match.  */
1152       return FNM_NOMATCH;
1153 
1154     case L_('?'):
1155       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1156         return 0;
1157       /* FALLTHROUGH */
1158 
1159     case L_('@'):
1160       do
1161         /* I cannot believe it but `strcat' is actually acceptable
1162            here.  Match the entire string with the prefix from the
1163            pattern list and the rest of the pattern following the
1164            pattern list.  */
1165         if (FCT (STRCAT (list->str, p), string, string_end,
1166                  no_leading_period,
1167                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1168           /* It worked.  Signal success.  */
1169           return 0;
1170       while ((list = list->next) != NULL);
1171 
1172       /* None of the patterns lead to a match.  */
1173       return FNM_NOMATCH;
1174 
1175     case L_('!'):
1176       for (rs = string; rs <= string_end; ++rs)
1177         {
1178           struct patternlist *runp;
1179 
1180           for (runp = list; runp != NULL; runp = runp->next)
1181             if (FCT (runp->str, string, rs,  no_leading_period,
1182                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1183               break;
1184 
1185           /* If none of the patterns matched see whether the rest does.  */
1186           if (runp == NULL
1187               && (FCT (p, rs, string_end,
1188                        rs == string
1189                        ? no_leading_period
1190                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1191                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1192                   == 0))
1193             /* This is successful.  */
1194             return 0;
1195         }
1196 
1197       /* None of the patterns together with the rest of the pattern
1198          lead to a match.  */
1199       return FNM_NOMATCH;
1200 
1201     default:
1202       assert (! "Invalid extended matching operator");
1203       break;
1204     }
1205 
1206   return -1;
1207 }
1208 
1209 
1210 #undef FOLD
1211 #undef CHAR
1212 #undef UCHAR
1213 #undef INT
1214 #undef FCT
1215 #undef EXT
1216 #undef END
1217 #undef MEMPCPY
1218 #undef MEMCHR
1219 #undef STRCOLL
1220 #undef STRLEN
1221 #undef STRCAT
1222 #undef L_
1223 #undef BTOWC
1224