xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/gnulib-lib/fnmatch_loop.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2 	Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17 
18 /* Match STRING against the file name pattern PATTERN, returning zero if
19    it matches, nonzero if not.  */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21 		const CHAR *string_end, bool no_leading_period, int flags)
22      internal_function;
23 static const CHAR *END (const CHAR *patternp) internal_function;
24 
25 static int
26 internal_function
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28      bool no_leading_period, int flags)
29 {
30   register const CHAR *p = pattern, *n = string;
31   register UCHAR c;
32 #ifdef _LIBC
33 # if WIDE_CHAR_VERSION
34   const char *collseq = (const char *)
35     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
36 # else
37   const UCHAR *collseq = (const UCHAR *)
38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
39 # endif
40 #endif
41 
42   while ((c = *p++) != L_('\0'))
43     {
44       bool new_no_leading_period = false;
45       c = FOLD (c);
46 
47       switch (c)
48 	{
49 	case L_('?'):
50 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
51 	    {
52 	      int res;
53 
54 	      res = EXT (c, p, n, string_end, no_leading_period,
55 			 flags);
56 	      if (res != -1)
57 		return res;
58 	    }
59 
60 	  if (n == string_end)
61 	    return FNM_NOMATCH;
62 	  else if (*n == L_('/') && (flags & FNM_FILE_NAME))
63 	    return FNM_NOMATCH;
64 	  else if (*n == L_('.') && no_leading_period)
65 	    return FNM_NOMATCH;
66 	  break;
67 
68 	case L_('\\'):
69 	  if (!(flags & FNM_NOESCAPE))
70 	    {
71 	      c = *p++;
72 	      if (c == L_('\0'))
73 		/* Trailing \ loses.  */
74 		return FNM_NOMATCH;
75 	      c = FOLD (c);
76 	    }
77 	  if (n == string_end || FOLD ((UCHAR) *n) != c)
78 	    return FNM_NOMATCH;
79 	  break;
80 
81 	case L_('*'):
82 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
83 	    {
84 	      int res;
85 
86 	      res = EXT (c, p, n, string_end, no_leading_period,
87 			 flags);
88 	      if (res != -1)
89 		return res;
90 	    }
91 
92 	  if (n != string_end && *n == L_('.') && no_leading_period)
93 	    return FNM_NOMATCH;
94 
95 	  for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
96 	    {
97 	      if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
98 		{
99 		  const CHAR *endp = END (p);
100 		  if (endp != p)
101 		    {
102 		      /* This is a pattern.  Skip over it.  */
103 		      p = endp;
104 		      continue;
105 		    }
106 		}
107 
108 	      if (c == L_('?'))
109 		{
110 		  /* A ? needs to match one character.  */
111 		  if (n == string_end)
112 		    /* There isn't another character; no match.  */
113 		    return FNM_NOMATCH;
114 		  else if (*n == L_('/')
115 			   && __builtin_expect (flags & FNM_FILE_NAME, 0))
116 		    /* A slash does not match a wildcard under
117 		       FNM_FILE_NAME.  */
118 		    return FNM_NOMATCH;
119 		  else
120 		    /* One character of the string is consumed in matching
121 		       this ? wildcard, so *??? won't match if there are
122 		       less than three characters.  */
123 		    ++n;
124 		}
125 	    }
126 
127 	  if (c == L_('\0'))
128 	    /* The wildcard(s) is/are the last element of the pattern.
129 	       If the name is a file name and contains another slash
130 	       this means it cannot match, unless the FNM_LEADING_DIR
131 	       flag is set.  */
132 	    {
133 	      int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
134 
135 	      if (flags & FNM_FILE_NAME)
136 		{
137 		  if (flags & FNM_LEADING_DIR)
138 		    result = 0;
139 		  else
140 		    {
141 		      if (MEMCHR (n, L_('/'), string_end - n) == NULL)
142 			result = 0;
143 		    }
144 		}
145 
146 	      return result;
147 	    }
148 	  else
149 	    {
150 	      const CHAR *endp;
151 
152 	      endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
153 			     string_end - n);
154 	      if (endp == NULL)
155 		endp = string_end;
156 
157 	      if (c == L_('[')
158 		  || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159 		      && (c == L_('@') || c == L_('+') || c == L_('!'))
160 		      && *p == L_('(')))
161 		{
162 		  int flags2 = ((flags & FNM_FILE_NAME)
163 				? flags : (flags & ~FNM_PERIOD));
164 		  bool no_leading_period2 = no_leading_period;
165 
166 		  for (--p; n < endp; ++n, no_leading_period2 = false)
167 		    if (FCT (p, n, string_end, no_leading_period2, flags2)
168 			== 0)
169 		      return 0;
170 		}
171 	      else if (c == L_('/') && (flags & FNM_FILE_NAME))
172 		{
173 		  while (n < string_end && *n != L_('/'))
174 		    ++n;
175 		  if (n < string_end && *n == L_('/')
176 		      && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
177 			  == 0))
178 		    return 0;
179 		}
180 	      else
181 		{
182 		  int flags2 = ((flags & FNM_FILE_NAME)
183 				? flags : (flags & ~FNM_PERIOD));
184 		  int no_leading_period2 = no_leading_period;
185 
186 		  if (c == L_('\\') && !(flags & FNM_NOESCAPE))
187 		    c = *p;
188 		  c = FOLD (c);
189 		  for (--p; n < endp; ++n, no_leading_period2 = false)
190 		    if (FOLD ((UCHAR) *n) == c
191 			&& (FCT (p, n, string_end, no_leading_period2, flags2)
192 			    == 0))
193 		      return 0;
194 		}
195 	    }
196 
197 	  /* If we come here no match is possible with the wildcard.  */
198 	  return FNM_NOMATCH;
199 
200 	case L_('['):
201 	  {
202 	    /* Nonzero if the sense of the character class is inverted.  */
203 	    register bool not;
204 	    CHAR cold;
205 	    UCHAR fn;
206 
207 	    if (posixly_correct == 0)
208 	      posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
209 
210 	    if (n == string_end)
211 	      return FNM_NOMATCH;
212 
213 	    if (*n == L_('.') && no_leading_period)
214 	      return FNM_NOMATCH;
215 
216 	    if (*n == L_('/') && (flags & FNM_FILE_NAME))
217 	      /* `/' cannot be matched.  */
218 	      return FNM_NOMATCH;
219 
220 	    not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
221 	    if (not)
222 	      ++p;
223 
224 	    fn = FOLD ((UCHAR) *n);
225 
226 	    c = *p++;
227 	    for (;;)
228 	      {
229 		if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
230 		  {
231 		    if (*p == L_('\0'))
232 		      return FNM_NOMATCH;
233 		    c = FOLD ((UCHAR) *p);
234 		    ++p;
235 
236 		    if (c == fn)
237 		      goto matched;
238 		  }
239 		else if (c == L_('[') && *p == L_(':'))
240 		  {
241 		    /* Leave room for the null.  */
242 		    CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243 		    size_t c1 = 0;
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
245 		    wctype_t wt;
246 #endif
247 		    const CHAR *startp = p;
248 
249 		    for (;;)
250 		      {
251 			if (c1 == CHAR_CLASS_MAX_LENGTH)
252 			  /* The name is too long and therefore the pattern
253 			     is ill-formed.  */
254 			  return FNM_NOMATCH;
255 
256 			c = *++p;
257 			if (c == L_(':') && p[1] == L_(']'))
258 			  {
259 			    p += 2;
260 			    break;
261 			  }
262 			if (c < L_('a') || c >= L_('z'))
263 			  {
264 			    /* This cannot possibly be a character class name.
265 			       Match it as a normal range.  */
266 			    p = startp;
267 			    c = L_('[');
268 			    goto normal_bracket;
269 			  }
270 			str[c1++] = c;
271 		      }
272 		    str[c1] = L_('\0');
273 
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 		    wt = IS_CHAR_CLASS (str);
276 		    if (wt == 0)
277 		      /* Invalid character class name.  */
278 		      return FNM_NOMATCH;
279 
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 		    /* The following code is glibc specific but does
282 		       there a good job in speeding up the code since
283 		       we can avoid the btowc() call.  */
284 		    if (_ISCTYPE ((UCHAR) *n, wt))
285 		      goto matched;
286 # else
287 		    if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
288 		      goto matched;
289 # endif
290 #else
291 		    if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292 			|| (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293 			|| (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294 			|| (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295 			|| (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296 			|| (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297 			|| (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298 			|| (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299 			|| (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300 			|| (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301 			|| (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302 			|| (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
303 		      goto matched;
304 #endif
305 		    c = *p++;
306 		  }
307 #ifdef _LIBC
308 		else if (c == L_('[') && *p == L_('='))
309 		  {
310 		    UCHAR str[1];
311 		    uint32_t nrules =
312 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 		    const CHAR *startp = p;
314 
315 		    c = *++p;
316 		    if (c == L_('\0'))
317 		      {
318 			p = startp;
319 			c = L_('[');
320 			goto normal_bracket;
321 		      }
322 		    str[0] = c;
323 
324 		    c = *++p;
325 		    if (c != L_('=') || p[1] != L_(']'))
326 		      {
327 			p = startp;
328 			c = L_('[');
329 			goto normal_bracket;
330 		      }
331 		    p += 2;
332 
333 		    if (nrules == 0)
334 		      {
335 			if ((UCHAR) *n == str[0])
336 			  goto matched;
337 		      }
338 		    else
339 		      {
340 			const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 			const int32_t *weights;
343 			const int32_t *extra;
344 # else
345 			const unsigned char *weights;
346 			const unsigned char *extra;
347 # endif
348 			const int32_t *indirect;
349 			int32_t idx;
350 			const UCHAR *cp = (const UCHAR *) str;
351 
352 			/* This #include defines a local function!  */
353 # if WIDE_CHAR_VERSION
354 #  include <locale/weightwc.h>
355 # else
356 #  include <locale/weight.h>
357 # endif
358 
359 # if WIDE_CHAR_VERSION
360 			table = (const int32_t *)
361 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 			weights = (const int32_t *)
363 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 			extra = (const int32_t *)
365 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 			indirect = (const int32_t *)
367 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368 # else
369 			table = (const int32_t *)
370 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 			weights = (const unsigned char *)
372 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 			extra = (const unsigned char *)
374 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 			indirect = (const int32_t *)
376 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
377 # endif
378 
379 			idx = findidx (&cp);
380 			if (idx != 0)
381 			  {
382 			    /* We found a table entry.  Now see whether the
383 			       character we are currently at has the same
384 			       equivalance class value.  */
385 			    int len = weights[idx];
386 			    int32_t idx2;
387 			    const UCHAR *np = (const UCHAR *) n;
388 
389 			    idx2 = findidx (&np);
390 			    if (idx2 != 0 && len == weights[idx2])
391 			      {
392 				int cnt = 0;
393 
394 				while (cnt < len
395 				       && (weights[idx + 1 + cnt]
396 					   == weights[idx2 + 1 + cnt]))
397 				  ++cnt;
398 
399 				if (cnt == len)
400 				  goto matched;
401 			      }
402 			  }
403 		      }
404 
405 		    c = *p++;
406 		  }
407 #endif
408 		else if (c == L_('\0'))
409 		  /* [ (unterminated) loses.  */
410 		  return FNM_NOMATCH;
411 		else
412 		  {
413 		    bool is_range = false;
414 
415 #ifdef _LIBC
416 		    bool is_seqval = false;
417 
418 		    if (c == L_('[') && *p == L_('.'))
419 		      {
420 			uint32_t nrules =
421 			  _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422 			const CHAR *startp = p;
423 			size_t c1 = 0;
424 
425 			while (1)
426 			  {
427 			    c = *++p;
428 			    if (c == L_('.') && p[1] == L_(']'))
429 			      {
430 				p += 2;
431 				break;
432 			      }
433 			    if (c == '\0')
434 			      return FNM_NOMATCH;
435 			    ++c1;
436 			  }
437 
438 			/* We have to handling the symbols differently in
439 			   ranges since then the collation sequence is
440 			   important.  */
441 			is_range = *p == L_('-') && p[1] != L_('\0');
442 
443 			if (nrules == 0)
444 			  {
445 			    /* There are no names defined in the collation
446 			       data.  Therefore we only accept the trivial
447 			       names consisting of the character itself.  */
448 			    if (c1 != 1)
449 			      return FNM_NOMATCH;
450 
451 			    if (!is_range && *n == startp[1])
452 			      goto matched;
453 
454 			    cold = startp[1];
455 			    c = *p++;
456 			  }
457 			else
458 			  {
459 			    int32_t table_size;
460 			    const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
462 			    char str[c1];
463 			    size_t strcnt;
464 # else
465 #  define str (startp + 1)
466 # endif
467 			    const unsigned char *extra;
468 			    int32_t idx;
469 			    int32_t elem;
470 			    int32_t second;
471 			    int32_t hash;
472 
473 # ifdef WIDE_CHAR_VERSION
474 			    /* We have to convert the name to a single-byte
475 			       string.  This is possible since the names
476 			       consist of ASCII characters and the internal
477 			       representation is UCS4.  */
478 			    for (strcnt = 0; strcnt < c1; ++strcnt)
479 			      str[strcnt] = startp[1 + strcnt];
480 # endif
481 
482 			    table_size =
483 			      _NL_CURRENT_WORD (LC_COLLATE,
484 						_NL_COLLATE_SYMB_HASH_SIZEMB);
485 			    symb_table = (const int32_t *)
486 			      _NL_CURRENT (LC_COLLATE,
487 					   _NL_COLLATE_SYMB_TABLEMB);
488 			    extra = (const unsigned char *)
489 			      _NL_CURRENT (LC_COLLATE,
490 					   _NL_COLLATE_SYMB_EXTRAMB);
491 
492 			    /* Locate the character in the hashing table.  */
493 			    hash = elem_hash (str, c1);
494 
495 			    idx = 0;
496 			    elem = hash % table_size;
497 			    second = hash % (table_size - 2);
498 			    while (symb_table[2 * elem] != 0)
499 			      {
500 				/* First compare the hashing value.  */
501 				if (symb_table[2 * elem] == hash
502 				    && c1 == extra[symb_table[2 * elem + 1]]
503 				    && memcmp (str,
504 					       &extra[symb_table[2 * elem + 1]
505 						     + 1], c1) == 0)
506 				  {
507 				    /* Yep, this is the entry.  */
508 				    idx = symb_table[2 * elem + 1];
509 				    idx += 1 + extra[idx];
510 				    break;
511 				  }
512 
513 				/* Next entry.  */
514 				elem += second;
515 			      }
516 
517 			    if (symb_table[2 * elem] != 0)
518 			      {
519 				/* Compare the byte sequence but only if
520 				   this is not part of a range.  */
521 # ifdef WIDE_CHAR_VERSION
522 				int32_t *wextra;
523 
524 				idx += 1 + extra[idx];
525 				/* Adjust for the alignment.  */
526 				idx = (idx + 3) & ~3;
527 
528 				wextra = (int32_t *) &extra[idx + 4];
529 # endif
530 
531 				if (! is_range)
532 				  {
533 # ifdef WIDE_CHAR_VERSION
534 				    for (c1 = 0;
535 					 (int32_t) c1 < wextra[idx];
536 					 ++c1)
537 				      if (n[c1] != wextra[1 + c1])
538 					break;
539 
540 				    if ((int32_t) c1 == wextra[idx])
541 				      goto matched;
542 # else
543 				    for (c1 = 0; c1 < extra[idx]; ++c1)
544 				      if (n[c1] != extra[1 + c1])
545 					break;
546 
547 				    if (c1 == extra[idx])
548 				      goto matched;
549 # endif
550 				  }
551 
552 				/* Get the collation sequence value.  */
553 				is_seqval = true;
554 # ifdef WIDE_CHAR_VERSION
555 				cold = wextra[1 + wextra[idx]];
556 # else
557 				/* Adjust for the alignment.  */
558 				idx += 1 + extra[idx];
559 				idx = (idx + 3) & ~4;
560 				cold = *((int32_t *) &extra[idx]);
561 # endif
562 
563 				c = *p++;
564 			      }
565 			    else if (c1 == 1)
566 			      {
567 				/* No valid character.  Match it as a
568 				   single byte.  */
569 				if (!is_range && *n == str[0])
570 				  goto matched;
571 
572 				cold = str[0];
573 				c = *p++;
574 			      }
575 			    else
576 			      return FNM_NOMATCH;
577 			  }
578 		      }
579 		    else
580 # undef str
581 #endif
582 		      {
583 			c = FOLD (c);
584 		      normal_bracket:
585 
586 			/* We have to handling the symbols differently in
587 			   ranges since then the collation sequence is
588 			   important.  */
589 			is_range = (*p == L_('-') && p[1] != L_('\0')
590 				    && p[1] != L_(']'));
591 
592 			if (!is_range && c == fn)
593 			  goto matched;
594 
595 			cold = c;
596 			c = *p++;
597 		      }
598 
599 		    if (c == L_('-') && *p != L_(']'))
600 		      {
601 #if _LIBC
602 			/* We have to find the collation sequence
603 			   value for C.  Collation sequence is nothing
604 			   we can regularly access.  The sequence
605 			   value is defined by the order in which the
606 			   definitions of the collation values for the
607 			   various characters appear in the source
608 			   file.  A strange concept, nowhere
609 			   documented.  */
610 			uint32_t fcollseq;
611 			uint32_t lcollseq;
612 			UCHAR cend = *p++;
613 
614 # ifdef WIDE_CHAR_VERSION
615 			/* Search in the `names' array for the characters.  */
616 			fcollseq = __collseq_table_lookup (collseq, fn);
617 			if (fcollseq == ~((uint32_t) 0))
618 			  /* XXX We don't know anything about the character
619 			     we are supposed to match.  This means we are
620 			     failing.  */
621 			  goto range_not_matched;
622 
623 			if (is_seqval)
624 			  lcollseq = cold;
625 			else
626 			  lcollseq = __collseq_table_lookup (collseq, cold);
627 # else
628 			fcollseq = collseq[fn];
629 			lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
630 # endif
631 
632 			is_seqval = false;
633 			if (cend == L_('[') && *p == L_('.'))
634 			  {
635 			    uint32_t nrules =
636 			      _NL_CURRENT_WORD (LC_COLLATE,
637 						_NL_COLLATE_NRULES);
638 			    const CHAR *startp = p;
639 			    size_t c1 = 0;
640 
641 			    while (1)
642 			      {
643 				c = *++p;
644 				if (c == L_('.') && p[1] == L_(']'))
645 				  {
646 				    p += 2;
647 				    break;
648 				  }
649 				if (c == '\0')
650 				  return FNM_NOMATCH;
651 				++c1;
652 			      }
653 
654 			    if (nrules == 0)
655 			      {
656 				/* There are no names defined in the
657 				   collation data.  Therefore we only
658 				   accept the trivial names consisting
659 				   of the character itself.  */
660 				if (c1 != 1)
661 				  return FNM_NOMATCH;
662 
663 				cend = startp[1];
664 			      }
665 			    else
666 			      {
667 				int32_t table_size;
668 				const int32_t *symb_table;
669 # ifdef WIDE_CHAR_VERSION
670 				char str[c1];
671 				size_t strcnt;
672 # else
673 #  define str (startp + 1)
674 # endif
675 				const unsigned char *extra;
676 				int32_t idx;
677 				int32_t elem;
678 				int32_t second;
679 				int32_t hash;
680 
681 # ifdef WIDE_CHAR_VERSION
682 				/* We have to convert the name to a single-byte
683 				   string.  This is possible since the names
684 				   consist of ASCII characters and the internal
685 				   representation is UCS4.  */
686 				for (strcnt = 0; strcnt < c1; ++strcnt)
687 				  str[strcnt] = startp[1 + strcnt];
688 # endif
689 
690 				table_size =
691 				  _NL_CURRENT_WORD (LC_COLLATE,
692 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
693 				symb_table = (const int32_t *)
694 				  _NL_CURRENT (LC_COLLATE,
695 					       _NL_COLLATE_SYMB_TABLEMB);
696 				extra = (const unsigned char *)
697 				  _NL_CURRENT (LC_COLLATE,
698 					       _NL_COLLATE_SYMB_EXTRAMB);
699 
700 				/* Locate the character in the hashing
701                                    table.  */
702 				hash = elem_hash (str, c1);
703 
704 				idx = 0;
705 				elem = hash % table_size;
706 				second = hash % (table_size - 2);
707 				while (symb_table[2 * elem] != 0)
708 				  {
709 				/* First compare the hashing value.  */
710 				    if (symb_table[2 * elem] == hash
711 					&& (c1
712 					    == extra[symb_table[2 * elem + 1]])
713 					&& memcmp (str,
714 						   &extra[symb_table[2 * elem + 1]
715 							 + 1], c1) == 0)
716 				      {
717 					/* Yep, this is the entry.  */
718 					idx = symb_table[2 * elem + 1];
719 					idx += 1 + extra[idx];
720 					break;
721 				      }
722 
723 				    /* Next entry.  */
724 				    elem += second;
725 				  }
726 
727 				if (symb_table[2 * elem] != 0)
728 				  {
729 				    /* Compare the byte sequence but only if
730 				       this is not part of a range.  */
731 # ifdef WIDE_CHAR_VERSION
732 				    int32_t *wextra;
733 
734 				    idx += 1 + extra[idx];
735 				    /* Adjust for the alignment.  */
736 				    idx = (idx + 3) & ~4;
737 
738 				    wextra = (int32_t *) &extra[idx + 4];
739 # endif
740 				    /* Get the collation sequence value.  */
741 				    is_seqval = true;
742 # ifdef WIDE_CHAR_VERSION
743 				    cend = wextra[1 + wextra[idx]];
744 # else
745 				    /* Adjust for the alignment.  */
746 				    idx += 1 + extra[idx];
747 				    idx = (idx + 3) & ~4;
748 				    cend = *((int32_t *) &extra[idx]);
749 # endif
750 				  }
751 				else if (symb_table[2 * elem] != 0 && c1 == 1)
752 				  {
753 				    cend = str[0];
754 				    c = *p++;
755 				  }
756 				else
757 				  return FNM_NOMATCH;
758 			      }
759 # undef str
760 			  }
761 			else
762 			  {
763 			    if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
764 			      cend = *p++;
765 			    if (cend == L_('\0'))
766 			      return FNM_NOMATCH;
767 			    cend = FOLD (cend);
768 			  }
769 
770 			/* XXX It is not entirely clear to me how to handle
771 			   characters which are not mentioned in the
772 			   collation specification.  */
773 			if (
774 # ifdef WIDE_CHAR_VERSION
775 			    lcollseq == 0xffffffff ||
776 # endif
777 			    lcollseq <= fcollseq)
778 			  {
779 			    /* We have to look at the upper bound.  */
780 			    uint32_t hcollseq;
781 
782 			    if (is_seqval)
783 			      hcollseq = cend;
784 			    else
785 			      {
786 # ifdef WIDE_CHAR_VERSION
787 				hcollseq =
788 				  __collseq_table_lookup (collseq, cend);
789 				if (hcollseq == ~((uint32_t) 0))
790 				  {
791 				    /* Hum, no information about the upper
792 				       bound.  The matching succeeds if the
793 				       lower bound is matched exactly.  */
794 				    if (lcollseq != fcollseq)
795 				      goto range_not_matched;
796 
797 				    goto matched;
798 				  }
799 # else
800 				hcollseq = collseq[cend];
801 # endif
802 			      }
803 
804 			    if (lcollseq <= hcollseq && fcollseq <= hcollseq)
805 			      goto matched;
806 			  }
807 # ifdef WIDE_CHAR_VERSION
808 		      range_not_matched:
809 # endif
810 #else
811 			/* We use a boring value comparison of the character
812 			   values.  This is better than comparing using
813 			   `strcoll' since the latter would have surprising
814 			   and sometimes fatal consequences.  */
815 			UCHAR cend = *p++;
816 
817 			if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
818 			  cend = *p++;
819 			if (cend == L_('\0'))
820 			  return FNM_NOMATCH;
821 
822 			/* It is a range.  */
823 			if (cold <= fn && fn <= cend)
824 			  goto matched;
825 #endif
826 
827 			c = *p++;
828 		      }
829 		  }
830 
831 		if (c == L_(']'))
832 		  break;
833 	      }
834 
835 	    if (!not)
836 	      return FNM_NOMATCH;
837 	    break;
838 
839 	  matched:
840 	    /* Skip the rest of the [...] that already matched.  */
841 	    do
842 	      {
843 	      ignore_next:
844 		c = *p++;
845 
846 		if (c == L_('\0'))
847 		  /* [... (unterminated) loses.  */
848 		  return FNM_NOMATCH;
849 
850 		if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
851 		  {
852 		    if (*p == L_('\0'))
853 		      return FNM_NOMATCH;
854 		    /* XXX 1003.2d11 is unclear if this is right.  */
855 		    ++p;
856 		  }
857 		else if (c == L_('[') && *p == L_(':'))
858 		  {
859 		    int c1 = 0;
860 		    const CHAR *startp = p;
861 
862 		    while (1)
863 		      {
864 			c = *++p;
865 			if (++c1 == CHAR_CLASS_MAX_LENGTH)
866 			  return FNM_NOMATCH;
867 
868 			if (*p == L_(':') && p[1] == L_(']'))
869 			  break;
870 
871 			if (c < L_('a') || c >= L_('z'))
872 			  {
873 			    p = startp;
874 			    goto ignore_next;
875 			  }
876 		      }
877 		    p += 2;
878 		    c = *p++;
879 		  }
880 		else if (c == L_('[') && *p == L_('='))
881 		  {
882 		    c = *++p;
883 		    if (c == L_('\0'))
884 		      return FNM_NOMATCH;
885 		    c = *++p;
886 		    if (c != L_('=') || p[1] != L_(']'))
887 		      return FNM_NOMATCH;
888 		    p += 2;
889 		    c = *p++;
890 		  }
891 		else if (c == L_('[') && *p == L_('.'))
892 		  {
893 		    ++p;
894 		    while (1)
895 		      {
896 			c = *++p;
897 			if (c == '\0')
898 			  return FNM_NOMATCH;
899 
900 			if (*p == L_('.') && p[1] == L_(']'))
901 			  break;
902 		      }
903 		    p += 2;
904 		    c = *p++;
905 		  }
906 	      }
907 	    while (c != L_(']'));
908 	    if (not)
909 	      return FNM_NOMATCH;
910 	  }
911 	  break;
912 
913 	case L_('+'):
914 	case L_('@'):
915 	case L_('!'):
916 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
917 	    {
918 	      int res;
919 
920 	      res = EXT (c, p, n, string_end, no_leading_period, flags);
921 	      if (res != -1)
922 		return res;
923 	    }
924 	  goto normal_match;
925 
926 	case L_('/'):
927 	  if (NO_LEADING_PERIOD (flags))
928 	    {
929 	      if (n == string_end || c != (UCHAR) *n)
930 		return FNM_NOMATCH;
931 
932 	      new_no_leading_period = true;
933 	      break;
934 	    }
935 	  /* FALLTHROUGH */
936 	default:
937 	normal_match:
938 	  if (n == string_end || c != FOLD ((UCHAR) *n))
939 	    return FNM_NOMATCH;
940 	}
941 
942       no_leading_period = new_no_leading_period;
943       ++n;
944     }
945 
946   if (n == string_end)
947     return 0;
948 
949   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
950     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
951     return 0;
952 
953   return FNM_NOMATCH;
954 }
955 
956 
957 static const CHAR *
958 internal_function
END(const CHAR * pattern)959 END (const CHAR *pattern)
960 {
961   const CHAR *p = pattern;
962 
963   while (1)
964     if (*++p == L_('\0'))
965       /* This is an invalid pattern.  */
966       return pattern;
967     else if (*p == L_('['))
968       {
969 	/* Handle brackets special.  */
970 	if (posixly_correct == 0)
971 	  posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
972 
973 	/* Skip the not sign.  We have to recognize it because of a possibly
974 	   following ']'.  */
975 	if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
976 	  ++p;
977 	/* A leading ']' is recognized as such.  */
978 	if (*p == L_(']'))
979 	  ++p;
980 	/* Skip over all characters of the list.  */
981 	while (*p != L_(']'))
982 	  if (*p++ == L_('\0'))
983 	    /* This is no valid pattern.  */
984 	    return pattern;
985       }
986     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
987 	      || *p == L_('!')) && p[1] == L_('('))
988       p = END (p + 1);
989     else if (*p == L_(')'))
990       break;
991 
992   return p + 1;
993 }
994 
995 
996 static int
997 internal_function
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)998 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
999      bool no_leading_period, int flags)
1000 {
1001   const CHAR *startp;
1002   size_t level;
1003   struct patternlist
1004   {
1005     struct patternlist *next;
1006     int malloced;
1007     CHAR str[1];
1008   } *list = NULL;
1009   struct patternlist **lastp = &list;
1010   size_t pattern_len = STRLEN (pattern);
1011   const CHAR *p;
1012   const CHAR *rs;
1013 #if HAVE_ALLOCA || defined _LIBC
1014   enum { ALLOCA_LIMIT = 8000 };
1015 #else
1016   enum { ALLOCA_LIMIT = 0 };
1017 #endif
1018   int retval;
1019 
1020   /* Parse the pattern.  Store the individual parts in the list.  */
1021   level = 0;
1022   for (startp = p = pattern + 1; ; ++p)
1023     if (*p == L_('\0'))
1024       /* This is an invalid pattern.  */
1025       goto failed;
1026     else if (*p == L_('['))
1027       {
1028 	/* Handle brackets special.  */
1029 	if (posixly_correct == 0)
1030 	  posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1031 
1032 	/* Skip the not sign.  We have to recognize it because of a possibly
1033 	   following ']'.  */
1034 	if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1035 	  ++p;
1036 	/* A leading ']' is recognized as such.  */
1037 	if (*p == L_(']'))
1038 	  ++p;
1039 	/* Skip over all characters of the list.  */
1040 	while (*p != L_(']'))
1041 	  if (*p++ == L_('\0'))
1042 	    /* This is no valid pattern.  */
1043 	    goto failed;
1044       }
1045     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1046 	      || *p == L_('!')) && p[1] == L_('('))
1047       /* Remember the nesting level.  */
1048       ++level;
1049     else if (*p == L_(')'))
1050       {
1051 	if (level-- == 0)
1052 	  {
1053 	    /* This means we found the end of the pattern.  */
1054 #define NEW_PATTERN \
1055 	    struct patternlist *newp;					      \
1056 	    size_t plen;						      \
1057 	    size_t plensize;						      \
1058 	    size_t newpsize;						      \
1059 									      \
1060 	    plen = (opt == L_('?') || opt == L_('@')			      \
1061 		    ? pattern_len					      \
1062 		    : p - startp + 1);					      \
1063 	    plensize = plen * sizeof (CHAR);				      \
1064 	    newpsize = offsetof (struct patternlist, str) + plensize;	      \
1065 	    if ((size_t) -1 / sizeof (CHAR) < plen			      \
1066 		|| newpsize < offsetof (struct patternlist, str))	      \
1067 	      goto failed;						      \
1068 	    if (newpsize < ALLOCA_LIMIT)				      \
1069 	      {								      \
1070 		newp = (struct patternlist *) alloca (newpsize);	      \
1071 		newp->malloced = 0;					      \
1072 	      }								      \
1073 	    else							      \
1074 	      {								      \
1075 		newp = (struct patternlist *) malloc (newpsize);	      \
1076 		if (!newp)						      \
1077 		  goto failed;						      \
1078 		newp->malloced = 1;					      \
1079 	      }								      \
1080 	    *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');   \
1081 	    newp->next = NULL;						      \
1082 	    *lastp = newp;						      \
1083 	    lastp = &newp->next
1084 	    NEW_PATTERN;
1085 	    break;
1086 	  }
1087       }
1088     else if (*p == L_('|'))
1089       {
1090 	if (level == 0)
1091 	  {
1092 	    NEW_PATTERN;
1093 	    startp = p + 1;
1094 	  }
1095       }
1096   assert (list != NULL);
1097   assert (p[-1] == L_(')'));
1098 #undef NEW_PATTERN
1099 
1100   switch (opt)
1101     {
1102     case L_('*'):
1103       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1104 	{
1105 	  retval = 0;
1106 	  goto done;
1107 	}
1108       /* FALLTHROUGH */
1109 
1110     case L_('+'):
1111       do
1112 	{
1113 	  struct patternlist *next;
1114 
1115 	  for (rs = string; rs <= string_end; ++rs)
1116 	    /* First match the prefix with the current pattern with the
1117 	       current pattern.  */
1118 	    if (FCT (list->str, string, rs, no_leading_period,
1119 		     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1120 		/* This was successful.  Now match the rest with the rest
1121 		   of the pattern.  */
1122 		&& (FCT (p, rs, string_end,
1123 			 rs == string
1124 			 ? no_leading_period
1125 			 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1126 			 flags & FNM_FILE_NAME
1127 			 ? flags : flags & ~FNM_PERIOD) == 0
1128 		    /* This didn't work.  Try the whole pattern.  */
1129 		    || (rs != string
1130 			&& FCT (pattern - 1, rs, string_end,
1131 				rs == string
1132 				? no_leading_period
1133 				: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1134 				flags & FNM_FILE_NAME
1135 				? flags : flags & ~FNM_PERIOD) == 0)))
1136 	      {
1137 		/* It worked.  Signal success.  */
1138 		retval = 0;
1139 		goto done;
1140 	      }
1141 
1142 	  next = list->next;
1143 	  if (list->malloced)
1144 	    free (list);
1145 	  list = next;
1146 	}
1147       while (list != NULL);
1148 
1149       /* None of the patterns lead to a match.  */
1150       return FNM_NOMATCH;
1151 
1152     case L_('?'):
1153       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1154 	{
1155 	  retval = 0;
1156 	  goto done;
1157 	}
1158       /* FALLTHROUGH */
1159 
1160     case L_('@'):
1161       do
1162 	{
1163 	  struct patternlist *next;
1164 
1165 	  /* I cannot believe it but `strcat' is actually acceptable
1166 	     here.  Match the entire string with the prefix from the
1167 	     pattern list and the rest of the pattern following the
1168 	     pattern list.  */
1169 	  if (FCT (STRCAT (list->str, p), string, string_end,
1170 		   no_leading_period,
1171 		   flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1172 	    {
1173 	      /* It worked.  Signal success.  */
1174 	      retval = 0;
1175 	      goto done;
1176 	    }
1177 
1178 	  next = list->next;
1179 	  if (list->malloced)
1180 	    free (list);
1181 	  list = next;
1182 	}
1183       while (list != NULL);
1184 
1185       /* None of the patterns lead to a match.  */
1186       return FNM_NOMATCH;
1187 
1188     case L_('!'):
1189       for (rs = string; rs <= string_end; ++rs)
1190 	{
1191 	  struct patternlist *runp;
1192 
1193 	  for (runp = list; runp != NULL; runp = runp->next)
1194 	    if (FCT (runp->str, string, rs,  no_leading_period,
1195 		     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1196 	      break;
1197 
1198 	  /* If none of the patterns matched see whether the rest does.  */
1199 	  if (runp == NULL
1200 	      && (FCT (p, rs, string_end,
1201 		       rs == string
1202 		       ? no_leading_period
1203 		       : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1204 		       flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1205 		  == 0))
1206 	    {
1207 	      /* This is successful.  */
1208 	      retval = 0;
1209 	      goto done;
1210 	    }
1211 	}
1212 
1213       /* None of the patterns together with the rest of the pattern
1214 	 lead to a match.  */
1215       retval = FNM_NOMATCH;
1216       goto done;
1217 
1218     default:
1219       assert (! "Invalid extended matching operator");
1220       break;
1221     }
1222 
1223  failed:
1224   retval = -1;
1225  done:
1226   while (list != NULL)
1227     {
1228       struct patternlist *next = list->next;
1229 
1230       if (list->malloced)
1231 	free (list);
1232       list = next;
1233     }
1234   return retval;
1235 }
1236 
1237 
1238 #undef FOLD
1239 #undef CHAR
1240 #undef UCHAR
1241 #undef INT
1242 #undef FCT
1243 #undef EXT
1244 #undef END
1245 #undef MEMPCPY
1246 #undef MEMCHR
1247 #undef STRCOLL
1248 #undef STRLEN
1249 #undef STRCAT
1250 #undef L_
1251 #undef BTOWC
1252