xref: /netbsd-src/external/gpl2/xcvs/dist/lib/fnmatch_loop.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
2 	Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17 
18 /* Match STRING against the file name pattern PATTERN, returning zero if
19    it matches, nonzero if not.  */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21 		const CHAR *string_end, bool no_leading_period, int flags)
22      internal_function;
23 static const CHAR *END (const CHAR *patternp) internal_function;
24 
25 static int
26 internal_function
27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28      bool no_leading_period, int flags)
29 {
30   register const CHAR *p = pattern, *n = string;
31   register UCHAR c;
32 #ifdef _LIBC
33 # if WIDE_CHAR_VERSION
34   const char *collseq = (const char *)
35     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
36 # else
37   const UCHAR *collseq = (const UCHAR *)
38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
39 # endif
40 #endif
41 
42   while ((c = *p++) != L('\0'))
43     {
44       bool new_no_leading_period = false;
45       c = FOLD (c);
46 
47       switch (c)
48 	{
49 	case L('?'):
50 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
51 	    {
52 	      int res;
53 
54 	      res = EXT (c, p, n, string_end, no_leading_period,
55 			 flags);
56 	      if (res != -1)
57 		return res;
58 	    }
59 
60 	  if (n == string_end)
61 	    return FNM_NOMATCH;
62 	  else if (*n == L('/') && (flags & FNM_FILE_NAME))
63 	    return FNM_NOMATCH;
64 	  else if (*n == L('.') && no_leading_period)
65 	    return FNM_NOMATCH;
66 	  break;
67 
68 	case L('\\'):
69 	  if (!(flags & FNM_NOESCAPE))
70 	    {
71 	      c = *p++;
72 	      if (c == L('\0'))
73 		/* Trailing \ loses.  */
74 		return FNM_NOMATCH;
75 	      c = FOLD (c);
76 	    }
77 	  if (n == string_end || FOLD ((UCHAR) *n) != c)
78 	    return FNM_NOMATCH;
79 	  break;
80 
81 	case L('*'):
82 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
83 	    {
84 	      int res;
85 
86 	      res = EXT (c, p, n, string_end, no_leading_period,
87 			 flags);
88 	      if (res != -1)
89 		return res;
90 	    }
91 
92 	  if (n != string_end && *n == L('.') && no_leading_period)
93 	    return FNM_NOMATCH;
94 
95 	  for (c = *p++; c == L('?') || c == L('*'); c = *p++)
96 	    {
97 	      if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
98 		{
99 		  const CHAR *endp = END (p);
100 		  if (endp != p)
101 		    {
102 		      /* This is a pattern.  Skip over it.  */
103 		      p = endp;
104 		      continue;
105 		    }
106 		}
107 
108 	      if (c == L('?'))
109 		{
110 		  /* A ? needs to match one character.  */
111 		  if (n == string_end)
112 		    /* There isn't another character; no match.  */
113 		    return FNM_NOMATCH;
114 		  else if (*n == L('/')
115 			   && __builtin_expect (flags & FNM_FILE_NAME, 0))
116 		    /* A slash does not match a wildcard under
117 		       FNM_FILE_NAME.  */
118 		    return FNM_NOMATCH;
119 		  else
120 		    /* One character of the string is consumed in matching
121 		       this ? wildcard, so *??? won't match if there are
122 		       less than three characters.  */
123 		    ++n;
124 		}
125 	    }
126 
127 	  if (c == L('\0'))
128 	    /* The wildcard(s) is/are the last element of the pattern.
129 	       If the name is a file name and contains another slash
130 	       this means it cannot match, unless the FNM_LEADING_DIR
131 	       flag is set.  */
132 	    {
133 	      int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
134 
135 	      if (flags & FNM_FILE_NAME)
136 		{
137 		  if (flags & FNM_LEADING_DIR)
138 		    result = 0;
139 		  else
140 		    {
141 		      if (MEMCHR (n, L('/'), string_end - n) == NULL)
142 			result = 0;
143 		    }
144 		}
145 
146 	      return result;
147 	    }
148 	  else
149 	    {
150 	      const CHAR *endp;
151 
152 	      endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
153 			     string_end - n);
154 	      if (endp == NULL)
155 		endp = string_end;
156 
157 	      if (c == L('[')
158 		  || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159 		      && (c == L('@') || c == L('+') || c == L('!'))
160 		      && *p == L('(')))
161 		{
162 		  int flags2 = ((flags & FNM_FILE_NAME)
163 				? flags : (flags & ~FNM_PERIOD));
164 		  bool no_leading_period2 = no_leading_period;
165 
166 		  for (--p; n < endp; ++n, no_leading_period2 = false)
167 		    if (FCT (p, n, string_end, no_leading_period2, flags2)
168 			== 0)
169 		      return 0;
170 		}
171 	      else if (c == L('/') && (flags & FNM_FILE_NAME))
172 		{
173 		  while (n < string_end && *n != L('/'))
174 		    ++n;
175 		  if (n < string_end && *n == L('/')
176 		      && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
177 			  == 0))
178 		    return 0;
179 		}
180 	      else
181 		{
182 		  int flags2 = ((flags & FNM_FILE_NAME)
183 				? flags : (flags & ~FNM_PERIOD));
184 		  int no_leading_period2 = no_leading_period;
185 
186 		  if (c == L('\\') && !(flags & FNM_NOESCAPE))
187 		    c = *p;
188 		  c = FOLD (c);
189 		  for (--p; n < endp; ++n, no_leading_period2 = false)
190 		    if (FOLD ((UCHAR) *n) == c
191 			&& (FCT (p, n, string_end, no_leading_period2, flags2)
192 			    == 0))
193 		      return 0;
194 		}
195 	    }
196 
197 	  /* If we come here no match is possible with the wildcard.  */
198 	  return FNM_NOMATCH;
199 
200 	case L('['):
201 	  {
202 	    /* Nonzero if the sense of the character class is inverted.  */
203 	    register bool not;
204 	    CHAR cold;
205 	    UCHAR fn;
206 
207 	    if (posixly_correct == 0)
208 	      posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
209 
210 	    if (n == string_end)
211 	      return FNM_NOMATCH;
212 
213 	    if (*n == L('.') && no_leading_period)
214 	      return FNM_NOMATCH;
215 
216 	    if (*n == L('/') && (flags & FNM_FILE_NAME))
217 	      /* `/' cannot be matched.  */
218 	      return FNM_NOMATCH;
219 
220 	    not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
221 	    if (not)
222 	      ++p;
223 
224 	    fn = FOLD ((UCHAR) *n);
225 
226 	    c = *p++;
227 	    for (;;)
228 	      {
229 		if (!(flags & FNM_NOESCAPE) && c == L('\\'))
230 		  {
231 		    if (*p == L('\0'))
232 		      return FNM_NOMATCH;
233 		    c = FOLD ((UCHAR) *p);
234 		    ++p;
235 
236 		    if (c == fn)
237 		      goto matched;
238 		  }
239 		else if (c == L('[') && *p == L(':'))
240 		  {
241 		    /* Leave room for the null.  */
242 		    CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243 		    size_t c1 = 0;
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
245 		    wctype_t wt;
246 #endif
247 		    const CHAR *startp = p;
248 
249 		    for (;;)
250 		      {
251 			if (c1 == CHAR_CLASS_MAX_LENGTH)
252 			  /* The name is too long and therefore the pattern
253 			     is ill-formed.  */
254 			  return FNM_NOMATCH;
255 
256 			c = *++p;
257 			if (c == L(':') && p[1] == L(']'))
258 			  {
259 			    p += 2;
260 			    break;
261 			  }
262 			if (c < L('a') || c >= L('z'))
263 			  {
264 			    /* This cannot possibly be a character class name.
265 			       Match it as a normal range.  */
266 			    p = startp;
267 			    c = L('[');
268 			    goto normal_bracket;
269 			  }
270 			str[c1++] = c;
271 		      }
272 		    str[c1] = L('\0');
273 
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 		    wt = IS_CHAR_CLASS (str);
276 		    if (wt == 0)
277 		      /* Invalid character class name.  */
278 		      return FNM_NOMATCH;
279 
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 		    /* The following code is glibc specific but does
282 		       there a good job in speeding up the code since
283 		       we can avoid the btowc() call.  */
284 		    if (_ISCTYPE ((UCHAR) *n, wt))
285 		      goto matched;
286 # else
287 		    if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
288 		      goto matched;
289 # endif
290 #else
291 		    if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
292 			|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
293 			|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
294 			|| (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
295 			|| (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
296 			|| (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
297 			|| (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
298 			|| (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
299 			|| (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
300 			|| (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
301 			|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
302 			|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
303 		      goto matched;
304 #endif
305 		    c = *p++;
306 		  }
307 #ifdef _LIBC
308 		else if (c == L('[') && *p == L('='))
309 		  {
310 		    UCHAR str[1];
311 		    uint32_t nrules =
312 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 		    const CHAR *startp = p;
314 
315 		    c = *++p;
316 		    if (c == L('\0'))
317 		      {
318 			p = startp;
319 			c = L('[');
320 			goto normal_bracket;
321 		      }
322 		    str[0] = c;
323 
324 		    c = *++p;
325 		    if (c != L('=') || p[1] != L(']'))
326 		      {
327 			p = startp;
328 			c = L('[');
329 			goto normal_bracket;
330 		      }
331 		    p += 2;
332 
333 		    if (nrules == 0)
334 		      {
335 			if ((UCHAR) *n == str[0])
336 			  goto matched;
337 		      }
338 		    else
339 		      {
340 			const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 			const int32_t *weights;
343 			const int32_t *extra;
344 # else
345 			const unsigned char *weights;
346 			const unsigned char *extra;
347 # endif
348 			const int32_t *indirect;
349 			int32_t idx;
350 			const UCHAR *cp = (const UCHAR *) str;
351 
352 			/* This #include defines a local function!  */
353 # if WIDE_CHAR_VERSION
354 #  include <locale/weightwc.h>
355 # else
356 #  include <locale/weight.h>
357 # endif
358 
359 # if WIDE_CHAR_VERSION
360 			table = (const int32_t *)
361 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 			weights = (const int32_t *)
363 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 			extra = (const int32_t *)
365 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 			indirect = (const int32_t *)
367 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368 # else
369 			table = (const int32_t *)
370 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 			weights = (const unsigned char *)
372 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 			extra = (const unsigned char *)
374 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 			indirect = (const int32_t *)
376 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
377 # endif
378 
379 			idx = findidx (&cp);
380 			if (idx != 0)
381 			  {
382 			    /* We found a table entry.  Now see whether the
383 			       character we are currently at has the same
384 			       equivalance class value.  */
385 			    int len = weights[idx];
386 			    int32_t idx2;
387 			    const UCHAR *np = (const UCHAR *) n;
388 
389 			    idx2 = findidx (&np);
390 			    if (idx2 != 0 && len == weights[idx2])
391 			      {
392 				int cnt = 0;
393 
394 				while (cnt < len
395 				       && (weights[idx + 1 + cnt]
396 					   == weights[idx2 + 1 + cnt]))
397 				  ++cnt;
398 
399 				if (cnt == len)
400 				  goto matched;
401 			      }
402 			  }
403 		      }
404 
405 		    c = *p++;
406 		  }
407 #endif
408 		else if (c == L('\0'))
409 		  /* [ (unterminated) loses.  */
410 		  return FNM_NOMATCH;
411 		else
412 		  {
413 		    bool is_range = false;
414 
415 #ifdef _LIBC
416 		    bool is_seqval = false;
417 
418 		    if (c == L('[') && *p == L('.'))
419 		      {
420 			uint32_t nrules =
421 			  _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422 			const CHAR *startp = p;
423 			size_t c1 = 0;
424 
425 			while (1)
426 			  {
427 			    c = *++p;
428 			    if (c == L('.') && p[1] == L(']'))
429 			      {
430 				p += 2;
431 				break;
432 			      }
433 			    if (c == '\0')
434 			      return FNM_NOMATCH;
435 			    ++c1;
436 			  }
437 
438 			/* We have to handling the symbols differently in
439 			   ranges since then the collation sequence is
440 			   important.  */
441 			is_range = *p == L('-') && p[1] != L('\0');
442 
443 			if (nrules == 0)
444 			  {
445 			    /* There are no names defined in the collation
446 			       data.  Therefore we only accept the trivial
447 			       names consisting of the character itself.  */
448 			    if (c1 != 1)
449 			      return FNM_NOMATCH;
450 
451 			    if (!is_range && *n == startp[1])
452 			      goto matched;
453 
454 			    cold = startp[1];
455 			    c = *p++;
456 			  }
457 			else
458 			  {
459 			    int32_t table_size;
460 			    const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
462 			    char str[c1];
463 			    size_t strcnt;
464 # else
465 #  define str (startp + 1)
466 # endif
467 			    const unsigned char *extra;
468 			    int32_t idx;
469 			    int32_t elem;
470 			    int32_t second;
471 			    int32_t hash;
472 
473 # ifdef WIDE_CHAR_VERSION
474 			    /* We have to convert the name to a single-byte
475 			       string.  This is possible since the names
476 			       consist of ASCII characters and the internal
477 			       representation is UCS4.  */
478 			    for (strcnt = 0; strcnt < c1; ++strcnt)
479 			      str[strcnt] = startp[1 + strcnt];
480 # endif
481 
482 			    table_size =
483 			      _NL_CURRENT_WORD (LC_COLLATE,
484 						_NL_COLLATE_SYMB_HASH_SIZEMB);
485 			    symb_table = (const int32_t *)
486 			      _NL_CURRENT (LC_COLLATE,
487 					   _NL_COLLATE_SYMB_TABLEMB);
488 			    extra = (const unsigned char *)
489 			      _NL_CURRENT (LC_COLLATE,
490 					   _NL_COLLATE_SYMB_EXTRAMB);
491 
492 			    /* Locate the character in the hashing table.  */
493 			    hash = elem_hash (str, c1);
494 
495 			    idx = 0;
496 			    elem = hash % table_size;
497 			    second = hash % (table_size - 2);
498 			    while (symb_table[2 * elem] != 0)
499 			      {
500 				/* First compare the hashing value.  */
501 				if (symb_table[2 * elem] == hash
502 				    && c1 == extra[symb_table[2 * elem + 1]]
503 				    && memcmp (str,
504 					       &extra[symb_table[2 * elem + 1]
505 						     + 1], c1) == 0)
506 				  {
507 				    /* Yep, this is the entry.  */
508 				    idx = symb_table[2 * elem + 1];
509 				    idx += 1 + extra[idx];
510 				    break;
511 				  }
512 
513 				/* Next entry.  */
514 				elem += second;
515 			      }
516 
517 			    if (symb_table[2 * elem] != 0)
518 			      {
519 				/* Compare the byte sequence but only if
520 				   this is not part of a range.  */
521 # ifdef WIDE_CHAR_VERSION
522 				int32_t *wextra;
523 
524 				idx += 1 + extra[idx];
525 				/* Adjust for the alignment.  */
526 				idx = (idx + 3) & ~3;
527 
528 				wextra = (int32_t *) &extra[idx + 4];
529 # endif
530 
531 				if (! is_range)
532 				  {
533 # ifdef WIDE_CHAR_VERSION
534 				    for (c1 = 0;
535 					 (int32_t) c1 < wextra[idx];
536 					 ++c1)
537 				      if (n[c1] != wextra[1 + c1])
538 					break;
539 
540 				    if ((int32_t) c1 == wextra[idx])
541 				      goto matched;
542 # else
543 				    for (c1 = 0; c1 < extra[idx]; ++c1)
544 				      if (n[c1] != extra[1 + c1])
545 					break;
546 
547 				    if (c1 == extra[idx])
548 				      goto matched;
549 # endif
550 				  }
551 
552 				/* Get the collation sequence value.  */
553 				is_seqval = true;
554 # ifdef WIDE_CHAR_VERSION
555 				cold = wextra[1 + wextra[idx]];
556 # else
557 				/* Adjust for the alignment.  */
558 				idx += 1 + extra[idx];
559 				idx = (idx + 3) & ~4;
560 				cold = *((int32_t *) &extra[idx]);
561 # endif
562 
563 				c = *p++;
564 			      }
565 			    else if (c1 == 1)
566 			      {
567 				/* No valid character.  Match it as a
568 				   single byte.  */
569 				if (!is_range && *n == str[0])
570 				  goto matched;
571 
572 				cold = str[0];
573 				c = *p++;
574 			      }
575 			    else
576 			      return FNM_NOMATCH;
577 			  }
578 		      }
579 		    else
580 # undef str
581 #endif
582 		      {
583 			c = FOLD (c);
584 		      normal_bracket:
585 
586 			/* We have to handling the symbols differently in
587 			   ranges since then the collation sequence is
588 			   important.  */
589 			is_range = (*p == L('-') && p[1] != L('\0')
590 				    && p[1] != L(']'));
591 
592 			if (!is_range && c == fn)
593 			  goto matched;
594 
595 			cold = c;
596 			c = *p++;
597 		      }
598 
599 		    if (c == L('-') && *p != L(']'))
600 		      {
601 #if _LIBC
602 			/* We have to find the collation sequence
603 			   value for C.  Collation sequence is nothing
604 			   we can regularly access.  The sequence
605 			   value is defined by the order in which the
606 			   definitions of the collation values for the
607 			   various characters appear in the source
608 			   file.  A strange concept, nowhere
609 			   documented.  */
610 			uint32_t fcollseq;
611 			uint32_t lcollseq;
612 			UCHAR cend = *p++;
613 
614 # ifdef WIDE_CHAR_VERSION
615 			/* Search in the `names' array for the characters.  */
616 			fcollseq = __collseq_table_lookup (collseq, fn);
617 			if (fcollseq == ~((uint32_t) 0))
618 			  /* XXX We don't know anything about the character
619 			     we are supposed to match.  This means we are
620 			     failing.  */
621 			  goto range_not_matched;
622 
623 			if (is_seqval)
624 			  lcollseq = cold;
625 			else
626 			  lcollseq = __collseq_table_lookup (collseq, cold);
627 # else
628 			fcollseq = collseq[fn];
629 			lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
630 # endif
631 
632 			is_seqval = false;
633 			if (cend == L('[') && *p == L('.'))
634 			  {
635 			    uint32_t nrules =
636 			      _NL_CURRENT_WORD (LC_COLLATE,
637 						_NL_COLLATE_NRULES);
638 			    const CHAR *startp = p;
639 			    size_t c1 = 0;
640 
641 			    while (1)
642 			      {
643 				c = *++p;
644 				if (c == L('.') && p[1] == L(']'))
645 				  {
646 				    p += 2;
647 				    break;
648 				  }
649 				if (c == '\0')
650 				  return FNM_NOMATCH;
651 				++c1;
652 			      }
653 
654 			    if (nrules == 0)
655 			      {
656 				/* There are no names defined in the
657 				   collation data.  Therefore we only
658 				   accept the trivial names consisting
659 				   of the character itself.  */
660 				if (c1 != 1)
661 				  return FNM_NOMATCH;
662 
663 				cend = startp[1];
664 			      }
665 			    else
666 			      {
667 				int32_t table_size;
668 				const int32_t *symb_table;
669 # ifdef WIDE_CHAR_VERSION
670 				char str[c1];
671 				size_t strcnt;
672 # else
673 #  define str (startp + 1)
674 # endif
675 				const unsigned char *extra;
676 				int32_t idx;
677 				int32_t elem;
678 				int32_t second;
679 				int32_t hash;
680 
681 # ifdef WIDE_CHAR_VERSION
682 				/* We have to convert the name to a single-byte
683 				   string.  This is possible since the names
684 				   consist of ASCII characters and the internal
685 				   representation is UCS4.  */
686 				for (strcnt = 0; strcnt < c1; ++strcnt)
687 				  str[strcnt] = startp[1 + strcnt];
688 # endif
689 
690 				table_size =
691 				  _NL_CURRENT_WORD (LC_COLLATE,
692 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
693 				symb_table = (const int32_t *)
694 				  _NL_CURRENT (LC_COLLATE,
695 					       _NL_COLLATE_SYMB_TABLEMB);
696 				extra = (const unsigned char *)
697 				  _NL_CURRENT (LC_COLLATE,
698 					       _NL_COLLATE_SYMB_EXTRAMB);
699 
700 				/* Locate the character in the hashing
701                                    table.  */
702 				hash = elem_hash (str, c1);
703 
704 				idx = 0;
705 				elem = hash % table_size;
706 				second = hash % (table_size - 2);
707 				while (symb_table[2 * elem] != 0)
708 				  {
709 				/* First compare the hashing value.  */
710 				    if (symb_table[2 * elem] == hash
711 					&& (c1
712 					    == extra[symb_table[2 * elem + 1]])
713 					&& memcmp (str,
714 						   &extra[symb_table[2 * elem + 1]
715 							 + 1], c1) == 0)
716 				      {
717 					/* Yep, this is the entry.  */
718 					idx = symb_table[2 * elem + 1];
719 					idx += 1 + extra[idx];
720 					break;
721 				      }
722 
723 				    /* Next entry.  */
724 				    elem += second;
725 				  }
726 
727 				if (symb_table[2 * elem] != 0)
728 				  {
729 				    /* Compare the byte sequence but only if
730 				       this is not part of a range.  */
731 # ifdef WIDE_CHAR_VERSION
732 				    int32_t *wextra;
733 
734 				    idx += 1 + extra[idx];
735 				    /* Adjust for the alignment.  */
736 				    idx = (idx + 3) & ~4;
737 
738 				    wextra = (int32_t *) &extra[idx + 4];
739 # endif
740 				    /* Get the collation sequence value.  */
741 				    is_seqval = true;
742 # ifdef WIDE_CHAR_VERSION
743 				    cend = wextra[1 + wextra[idx]];
744 # else
745 				    /* Adjust for the alignment.  */
746 				    idx += 1 + extra[idx];
747 				    idx = (idx + 3) & ~4;
748 				    cend = *((int32_t *) &extra[idx]);
749 # endif
750 				  }
751 				else if (symb_table[2 * elem] != 0 && c1 == 1)
752 				  {
753 				    cend = str[0];
754 				    c = *p++;
755 				  }
756 				else
757 				  return FNM_NOMATCH;
758 			      }
759 # undef str
760 			  }
761 			else
762 			  {
763 			    if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
764 			      cend = *p++;
765 			    if (cend == L('\0'))
766 			      return FNM_NOMATCH;
767 			    cend = FOLD (cend);
768 			  }
769 
770 			/* XXX It is not entirely clear to me how to handle
771 			   characters which are not mentioned in the
772 			   collation specification.  */
773 			if (
774 # ifdef WIDE_CHAR_VERSION
775 			    lcollseq == 0xffffffff ||
776 # endif
777 			    lcollseq <= fcollseq)
778 			  {
779 			    /* We have to look at the upper bound.  */
780 			    uint32_t hcollseq;
781 
782 			    if (is_seqval)
783 			      hcollseq = cend;
784 			    else
785 			      {
786 # ifdef WIDE_CHAR_VERSION
787 				hcollseq =
788 				  __collseq_table_lookup (collseq, cend);
789 				if (hcollseq == ~((uint32_t) 0))
790 				  {
791 				    /* Hum, no information about the upper
792 				       bound.  The matching succeeds if the
793 				       lower bound is matched exactly.  */
794 				    if (lcollseq != fcollseq)
795 				      goto range_not_matched;
796 
797 				    goto matched;
798 				  }
799 # else
800 				hcollseq = collseq[cend];
801 # endif
802 			      }
803 
804 			    if (lcollseq <= hcollseq && fcollseq <= hcollseq)
805 			      goto matched;
806 			  }
807 # ifdef WIDE_CHAR_VERSION
808 		      range_not_matched:
809 # endif
810 #else
811 			/* We use a boring value comparison of the character
812 			   values.  This is better than comparing using
813 			   `strcoll' since the latter would have surprising
814 			   and sometimes fatal consequences.  */
815 			UCHAR cend = *p++;
816 
817 			if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
818 			  cend = *p++;
819 			if (cend == L('\0'))
820 			  return FNM_NOMATCH;
821 
822 			/* It is a range.  */
823 			if (cold <= fn && fn <= cend)
824 			  goto matched;
825 #endif
826 
827 			c = *p++;
828 		      }
829 		  }
830 
831 		if (c == L(']'))
832 		  break;
833 	      }
834 
835 	    if (!not)
836 	      return FNM_NOMATCH;
837 	    break;
838 
839 	  matched:
840 	    /* Skip the rest of the [...] that already matched.  */
841 	    do
842 	      {
843 	      ignore_next:
844 		c = *p++;
845 
846 		if (c == L('\0'))
847 		  /* [... (unterminated) loses.  */
848 		  return FNM_NOMATCH;
849 
850 		if (!(flags & FNM_NOESCAPE) && c == L('\\'))
851 		  {
852 		    if (*p == L('\0'))
853 		      return FNM_NOMATCH;
854 		    /* XXX 1003.2d11 is unclear if this is right.  */
855 		    ++p;
856 		  }
857 		else if (c == L('[') && *p == L(':'))
858 		  {
859 		    int c1 = 0;
860 		    const CHAR *startp = p;
861 
862 		    while (1)
863 		      {
864 			c = *++p;
865 			if (++c1 == CHAR_CLASS_MAX_LENGTH)
866 			  return FNM_NOMATCH;
867 
868 			if (*p == L(':') && p[1] == L(']'))
869 			  break;
870 
871 			if (c < L('a') || c >= L('z'))
872 			  {
873 			    p = startp;
874 			    goto ignore_next;
875 			  }
876 		      }
877 		    p += 2;
878 		    c = *p++;
879 		  }
880 		else if (c == L('[') && *p == L('='))
881 		  {
882 		    c = *++p;
883 		    if (c == L('\0'))
884 		      return FNM_NOMATCH;
885 		    c = *++p;
886 		    if (c != L('=') || p[1] != L(']'))
887 		      return FNM_NOMATCH;
888 		    p += 2;
889 		    c = *p++;
890 		  }
891 		else if (c == L('[') && *p == L('.'))
892 		  {
893 		    ++p;
894 		    while (1)
895 		      {
896 			c = *++p;
897 			if (c == '\0')
898 			  return FNM_NOMATCH;
899 
900 			if (*p == L('.') && p[1] == L(']'))
901 			  break;
902 		      }
903 		    p += 2;
904 		    c = *p++;
905 		  }
906 	      }
907 	    while (c != L(']'));
908 	    if (not)
909 	      return FNM_NOMATCH;
910 	  }
911 	  break;
912 
913 	case L('+'):
914 	case L('@'):
915 	case L('!'):
916 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
917 	    {
918 	      int res;
919 
920 	      res = EXT (c, p, n, string_end, no_leading_period, flags);
921 	      if (res != -1)
922 		return res;
923 	    }
924 	  goto normal_match;
925 
926 	case L('/'):
927 	  if (NO_LEADING_PERIOD (flags))
928 	    {
929 	      if (n == string_end || c != (UCHAR) *n)
930 		return FNM_NOMATCH;
931 
932 	      new_no_leading_period = true;
933 	      break;
934 	    }
935 	  /* FALLTHROUGH */
936 	default:
937 	normal_match:
938 	  if (n == string_end || c != FOLD ((UCHAR) *n))
939 	    return FNM_NOMATCH;
940 	}
941 
942       no_leading_period = new_no_leading_period;
943       ++n;
944     }
945 
946   if (n == string_end)
947     return 0;
948 
949   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
950     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
951     return 0;
952 
953   return FNM_NOMATCH;
954 }
955 
956 
957 static const CHAR *
958 internal_function
959 END (const CHAR *pattern)
960 {
961   const CHAR *p = pattern;
962 
963   while (1)
964     if (*++p == L('\0'))
965       /* This is an invalid pattern.  */
966       return pattern;
967     else if (*p == L('['))
968       {
969 	/* Handle brackets special.  */
970 	if (posixly_correct == 0)
971 	  posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
972 
973 	/* Skip the not sign.  We have to recognize it because of a possibly
974 	   following ']'.  */
975 	if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
976 	  ++p;
977 	/* A leading ']' is recognized as such.  */
978 	if (*p == L(']'))
979 	  ++p;
980 	/* Skip over all characters of the list.  */
981 	while (*p != L(']'))
982 	  if (*p++ == L('\0'))
983 	    /* This is no valid pattern.  */
984 	    return pattern;
985       }
986     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
987 	      || *p == L('!')) && p[1] == L('('))
988       p = END (p + 1);
989     else if (*p == L(')'))
990       break;
991 
992   return p + 1;
993 }
994 
995 
996 static int
997 internal_function
998 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
999      bool no_leading_period, int flags)
1000 {
1001   const CHAR *startp;
1002   size_t level;
1003   struct patternlist
1004   {
1005     struct patternlist *next;
1006     CHAR str[1];
1007   } *list = NULL;
1008   struct patternlist **lastp = &list;
1009   size_t pattern_len = STRLEN (pattern);
1010   const CHAR *p;
1011   const CHAR *rs;
1012   enum { ALLOCA_LIMIT = 8000 };
1013 
1014   /* Parse the pattern.  Store the individual parts in the list.  */
1015   level = 0;
1016   for (startp = p = pattern + 1; ; ++p)
1017     if (*p == L('\0'))
1018       /* This is an invalid pattern.  */
1019       return -1;
1020     else if (*p == L('['))
1021       {
1022 	/* Handle brackets special.  */
1023 	if (posixly_correct == 0)
1024 	  posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1025 
1026 	/* Skip the not sign.  We have to recognize it because of a possibly
1027 	   following ']'.  */
1028 	if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1029 	  ++p;
1030 	/* A leading ']' is recognized as such.  */
1031 	if (*p == L(']'))
1032 	  ++p;
1033 	/* Skip over all characters of the list.  */
1034 	while (*p != L(']'))
1035 	  if (*p++ == L('\0'))
1036 	    /* This is no valid pattern.  */
1037 	    return -1;
1038       }
1039     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1040 	      || *p == L('!')) && p[1] == L('('))
1041       /* Remember the nesting level.  */
1042       ++level;
1043     else if (*p == L(')'))
1044       {
1045 	if (level-- == 0)
1046 	  {
1047 	    /* This means we found the end of the pattern.  */
1048 #define NEW_PATTERN \
1049 	    struct patternlist *newp;					      \
1050 	    size_t plen;						      \
1051 	    size_t plensize;						      \
1052 	    size_t newpsize;						      \
1053 									      \
1054 	    plen = (opt == L('?') || opt == L('@')			      \
1055 		    ? pattern_len					      \
1056 		    : p - startp + 1);					      \
1057 	    plensize = plen * sizeof (CHAR);				      \
1058 	    newpsize = offsetof (struct patternlist, str) + plensize;	      \
1059 	    if ((size_t) -1 / sizeof (CHAR) < plen			      \
1060 		|| newpsize < offsetof (struct patternlist, str)	      \
1061 		|| ALLOCA_LIMIT <= newpsize)				      \
1062 	      return -1;						      \
1063 	    newp = (struct patternlist *) alloca (newpsize);		      \
1064 	    *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1065 	    newp->next = NULL;						      \
1066 	    *lastp = newp;						      \
1067 	    lastp = &newp->next
1068 	    NEW_PATTERN;
1069 	    break;
1070 	  }
1071       }
1072     else if (*p == L('|'))
1073       {
1074 	if (level == 0)
1075 	  {
1076 	    NEW_PATTERN;
1077 	    startp = p + 1;
1078 	  }
1079       }
1080   assert (list != NULL);
1081   assert (p[-1] == L(')'));
1082 #undef NEW_PATTERN
1083 
1084   switch (opt)
1085     {
1086     case L('*'):
1087       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1088 	return 0;
1089       /* FALLTHROUGH */
1090 
1091     case L('+'):
1092       do
1093 	{
1094 	  for (rs = string; rs <= string_end; ++rs)
1095 	    /* First match the prefix with the current pattern with the
1096 	       current pattern.  */
1097 	    if (FCT (list->str, string, rs, no_leading_period,
1098 		     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1099 		/* This was successful.  Now match the rest with the rest
1100 		   of the pattern.  */
1101 		&& (FCT (p, rs, string_end,
1102 			 rs == string
1103 			 ? no_leading_period
1104 			 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1105 			 flags & FNM_FILE_NAME
1106 			 ? flags : flags & ~FNM_PERIOD) == 0
1107 		    /* This didn't work.  Try the whole pattern.  */
1108 		    || (rs != string
1109 			&& FCT (pattern - 1, rs, string_end,
1110 				rs == string
1111 				? no_leading_period
1112 				: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1113 				flags & FNM_FILE_NAME
1114 				? flags : flags & ~FNM_PERIOD) == 0)))
1115 	      /* It worked.  Signal success.  */
1116 	      return 0;
1117 	}
1118       while ((list = list->next) != NULL);
1119 
1120       /* None of the patterns lead to a match.  */
1121       return FNM_NOMATCH;
1122 
1123     case L('?'):
1124       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1125 	return 0;
1126       /* FALLTHROUGH */
1127 
1128     case L('@'):
1129       do
1130 	/* I cannot believe it but `strcat' is actually acceptable
1131 	   here.  Match the entire string with the prefix from the
1132 	   pattern list and the rest of the pattern following the
1133 	   pattern list.  */
1134 	if (FCT (STRCAT (list->str, p), string, string_end,
1135 		 no_leading_period,
1136 		 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1137 	  /* It worked.  Signal success.  */
1138 	  return 0;
1139       while ((list = list->next) != NULL);
1140 
1141       /* None of the patterns lead to a match.  */
1142       return FNM_NOMATCH;
1143 
1144     case L('!'):
1145       for (rs = string; rs <= string_end; ++rs)
1146 	{
1147 	  struct patternlist *runp;
1148 
1149 	  for (runp = list; runp != NULL; runp = runp->next)
1150 	    if (FCT (runp->str, string, rs,  no_leading_period,
1151 		     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152 	      break;
1153 
1154 	  /* If none of the patterns matched see whether the rest does.  */
1155 	  if (runp == NULL
1156 	      && (FCT (p, rs, string_end,
1157 		       rs == string
1158 		       ? no_leading_period
1159 		       : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1160 		       flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1161 		  == 0))
1162 	    /* This is successful.  */
1163 	    return 0;
1164 	}
1165 
1166       /* None of the patterns together with the rest of the pattern
1167 	 lead to a match.  */
1168       return FNM_NOMATCH;
1169 
1170     default:
1171       assert (! "Invalid extended matching operator");
1172       break;
1173     }
1174 
1175   return -1;
1176 }
1177 
1178 
1179 #undef FOLD
1180 #undef CHAR
1181 #undef UCHAR
1182 #undef INT
1183 #undef FCT
1184 #undef EXT
1185 #undef END
1186 #undef MEMPCPY
1187 #undef MEMCHR
1188 #undef STRCOLL
1189 #undef STRLEN
1190 #undef STRCAT
1191 #undef L
1192 #undef BTOWC
1193