xref: /netbsd-src/external/gpl2/xcvs/dist/lib/fnmatch_loop.c (revision 5a6c14c844c4c665da5632061aebde7bb2cb5766)
1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
2 	Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17 #include <sys/cdefs.h>
18 __RCSID("$NetBSD: fnmatch_loop.c,v 1.2 2016/05/17 14:00:09 christos Exp $");
19 
20 
21 /* Match STRING against the file name pattern PATTERN, returning zero if
22    it matches, nonzero if not.  */
23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
24 		const CHAR *string_end, bool no_leading_period, int flags)
25      internal_function;
26 static const CHAR *END (const CHAR *patternp) internal_function;
27 
28 static int
29 internal_function
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)30 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
31      bool no_leading_period, int flags)
32 {
33   register const CHAR *p = pattern, *n = string;
34   register UCHAR c;
35 #ifdef _LIBC
36 # if WIDE_CHAR_VERSION
37   const char *collseq = (const char *)
38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
39 # else
40   const UCHAR *collseq = (const UCHAR *)
41     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 # endif
43 #endif
44 
45   while ((c = *p++) != L('\0'))
46     {
47       bool new_no_leading_period = false;
48       c = FOLD (c);
49 
50       switch (c)
51 	{
52 	case L('?'):
53 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54 	    {
55 	      int res;
56 
57 	      res = EXT (c, p, n, string_end, no_leading_period,
58 			 flags);
59 	      if (res != -1)
60 		return res;
61 	    }
62 
63 	  if (n == string_end)
64 	    return FNM_NOMATCH;
65 	  else if (*n == L('/') && (flags & FNM_FILE_NAME))
66 	    return FNM_NOMATCH;
67 	  else if (*n == L('.') && no_leading_period)
68 	    return FNM_NOMATCH;
69 	  break;
70 
71 	case L('\\'):
72 	  if (!(flags & FNM_NOESCAPE))
73 	    {
74 	      c = *p++;
75 	      if (c == L('\0'))
76 		/* Trailing \ loses.  */
77 		return FNM_NOMATCH;
78 	      c = FOLD (c);
79 	    }
80 	  if (n == string_end || FOLD ((UCHAR) *n) != c)
81 	    return FNM_NOMATCH;
82 	  break;
83 
84 	case L('*'):
85 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86 	    {
87 	      int res;
88 
89 	      res = EXT (c, p, n, string_end, no_leading_period,
90 			 flags);
91 	      if (res != -1)
92 		return res;
93 	    }
94 
95 	  if (n != string_end && *n == L('.') && no_leading_period)
96 	    return FNM_NOMATCH;
97 
98 	  for (c = *p++; c == L('?') || c == L('*'); c = *p++)
99 	    {
100 	      if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
101 		{
102 		  const CHAR *endp = END (p);
103 		  if (endp != p)
104 		    {
105 		      /* This is a pattern.  Skip over it.  */
106 		      p = endp;
107 		      continue;
108 		    }
109 		}
110 
111 	      if (c == L('?'))
112 		{
113 		  /* A ? needs to match one character.  */
114 		  if (n == string_end)
115 		    /* There isn't another character; no match.  */
116 		    return FNM_NOMATCH;
117 		  else if (*n == L('/')
118 			   && __builtin_expect (flags & FNM_FILE_NAME, 0))
119 		    /* A slash does not match a wildcard under
120 		       FNM_FILE_NAME.  */
121 		    return FNM_NOMATCH;
122 		  else
123 		    /* One character of the string is consumed in matching
124 		       this ? wildcard, so *??? won't match if there are
125 		       less than three characters.  */
126 		    ++n;
127 		}
128 	    }
129 
130 	  if (c == L('\0'))
131 	    /* The wildcard(s) is/are the last element of the pattern.
132 	       If the name is a file name and contains another slash
133 	       this means it cannot match, unless the FNM_LEADING_DIR
134 	       flag is set.  */
135 	    {
136 	      int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
137 
138 	      if (flags & FNM_FILE_NAME)
139 		{
140 		  if (flags & FNM_LEADING_DIR)
141 		    result = 0;
142 		  else
143 		    {
144 		      if (MEMCHR (n, L('/'), string_end - n) == NULL)
145 			result = 0;
146 		    }
147 		}
148 
149 	      return result;
150 	    }
151 	  else
152 	    {
153 	      const CHAR *endp;
154 
155 	      endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
156 			     string_end - n);
157 	      if (endp == NULL)
158 		endp = string_end;
159 
160 	      if (c == L('[')
161 		  || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
162 		      && (c == L('@') || c == L('+') || c == L('!'))
163 		      && *p == L('(')))
164 		{
165 		  int flags2 = ((flags & FNM_FILE_NAME)
166 				? flags : (flags & ~FNM_PERIOD));
167 		  bool no_leading_period2 = no_leading_period;
168 
169 		  for (--p; n < endp; ++n, no_leading_period2 = false)
170 		    if (FCT (p, n, string_end, no_leading_period2, flags2)
171 			== 0)
172 		      return 0;
173 		}
174 	      else if (c == L('/') && (flags & FNM_FILE_NAME))
175 		{
176 		  while (n < string_end && *n != L('/'))
177 		    ++n;
178 		  if (n < string_end && *n == L('/')
179 		      && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
180 			  == 0))
181 		    return 0;
182 		}
183 	      else
184 		{
185 		  int flags2 = ((flags & FNM_FILE_NAME)
186 				? flags : (flags & ~FNM_PERIOD));
187 		  int no_leading_period2 = no_leading_period;
188 
189 		  if (c == L('\\') && !(flags & FNM_NOESCAPE))
190 		    c = *p;
191 		  c = FOLD (c);
192 		  for (--p; n < endp; ++n, no_leading_period2 = false)
193 		    if (FOLD ((UCHAR) *n) == c
194 			&& (FCT (p, n, string_end, no_leading_period2, flags2)
195 			    == 0))
196 		      return 0;
197 		}
198 	    }
199 
200 	  /* If we come here no match is possible with the wildcard.  */
201 	  return FNM_NOMATCH;
202 
203 	case L('['):
204 	  {
205 	    /* Nonzero if the sense of the character class is inverted.  */
206 	    register bool not;
207 	    CHAR cold;
208 	    UCHAR fn;
209 
210 	    if (posixly_correct == 0)
211 	      posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
212 
213 	    if (n == string_end)
214 	      return FNM_NOMATCH;
215 
216 	    if (*n == L('.') && no_leading_period)
217 	      return FNM_NOMATCH;
218 
219 	    if (*n == L('/') && (flags & FNM_FILE_NAME))
220 	      /* `/' cannot be matched.  */
221 	      return FNM_NOMATCH;
222 
223 	    not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
224 	    if (not)
225 	      ++p;
226 
227 	    fn = FOLD ((UCHAR) *n);
228 
229 	    c = *p++;
230 	    for (;;)
231 	      {
232 		if (!(flags & FNM_NOESCAPE) && c == L('\\'))
233 		  {
234 		    if (*p == L('\0'))
235 		      return FNM_NOMATCH;
236 		    c = FOLD ((UCHAR) *p);
237 		    ++p;
238 
239 		    if (c == fn)
240 		      goto matched;
241 		  }
242 		else if (c == L('[') && *p == L(':'))
243 		  {
244 		    /* Leave room for the null.  */
245 		    CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
246 		    size_t c1 = 0;
247 #if defined _LIBC || WIDE_CHAR_SUPPORT
248 		    wctype_t wt;
249 #endif
250 		    const CHAR *startp = p;
251 
252 		    for (;;)
253 		      {
254 			if (c1 == CHAR_CLASS_MAX_LENGTH)
255 			  /* The name is too long and therefore the pattern
256 			     is ill-formed.  */
257 			  return FNM_NOMATCH;
258 
259 			c = *++p;
260 			if (c == L(':') && p[1] == L(']'))
261 			  {
262 			    p += 2;
263 			    break;
264 			  }
265 			if (c < L('a') || c >= L('z'))
266 			  {
267 			    /* This cannot possibly be a character class name.
268 			       Match it as a normal range.  */
269 			    p = startp;
270 			    c = L('[');
271 			    goto normal_bracket;
272 			  }
273 			str[c1++] = c;
274 		      }
275 		    str[c1] = L('\0');
276 
277 #if defined _LIBC || WIDE_CHAR_SUPPORT
278 		    wt = IS_CHAR_CLASS (str);
279 		    if (wt == 0)
280 		      /* Invalid character class name.  */
281 		      return FNM_NOMATCH;
282 
283 # if defined _LIBC && ! WIDE_CHAR_VERSION
284 		    /* The following code is glibc specific but does
285 		       there a good job in speeding up the code since
286 		       we can avoid the btowc() call.  */
287 		    if (_ISCTYPE ((UCHAR) *n, wt))
288 		      goto matched;
289 # else
290 		    if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291 		      goto matched;
292 # endif
293 #else
294 		    if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
295 			|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
296 			|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
297 			|| (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
298 			|| (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
299 			|| (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
300 			|| (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
301 			|| (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
302 			|| (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
303 			|| (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
304 			|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
305 			|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
306 		      goto matched;
307 #endif
308 		    c = *p++;
309 		  }
310 #ifdef _LIBC
311 		else if (c == L('[') && *p == L('='))
312 		  {
313 		    UCHAR str[1];
314 		    uint32_t nrules =
315 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
316 		    const CHAR *startp = p;
317 
318 		    c = *++p;
319 		    if (c == L('\0'))
320 		      {
321 			p = startp;
322 			c = L('[');
323 			goto normal_bracket;
324 		      }
325 		    str[0] = c;
326 
327 		    c = *++p;
328 		    if (c != L('=') || p[1] != L(']'))
329 		      {
330 			p = startp;
331 			c = L('[');
332 			goto normal_bracket;
333 		      }
334 		    p += 2;
335 
336 		    if (nrules == 0)
337 		      {
338 			if ((UCHAR) *n == str[0])
339 			  goto matched;
340 		      }
341 		    else
342 		      {
343 			const int32_t *table;
344 # if WIDE_CHAR_VERSION
345 			const int32_t *weights;
346 			const int32_t *extra;
347 # else
348 			const unsigned char *weights;
349 			const unsigned char *extra;
350 # endif
351 			const int32_t *indirect;
352 			int32_t idx;
353 			const UCHAR *cp = (const UCHAR *) str;
354 
355 			/* This #include defines a local function!  */
356 # if WIDE_CHAR_VERSION
357 #  include <locale/weightwc.h>
358 # else
359 #  include <locale/weight.h>
360 # endif
361 
362 # if WIDE_CHAR_VERSION
363 			table = (const int32_t *)
364 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
365 			weights = (const int32_t *)
366 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
367 			extra = (const int32_t *)
368 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
369 			indirect = (const int32_t *)
370 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
371 # else
372 			table = (const int32_t *)
373 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
374 			weights = (const unsigned char *)
375 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
376 			extra = (const unsigned char *)
377 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
378 			indirect = (const int32_t *)
379 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
380 # endif
381 
382 			idx = findidx (&cp);
383 			if (idx != 0)
384 			  {
385 			    /* We found a table entry.  Now see whether the
386 			       character we are currently at has the same
387 			       equivalance class value.  */
388 			    int len = weights[idx];
389 			    int32_t idx2;
390 			    const UCHAR *np = (const UCHAR *) n;
391 
392 			    idx2 = findidx (&np);
393 			    if (idx2 != 0 && len == weights[idx2])
394 			      {
395 				int cnt = 0;
396 
397 				while (cnt < len
398 				       && (weights[idx + 1 + cnt]
399 					   == weights[idx2 + 1 + cnt]))
400 				  ++cnt;
401 
402 				if (cnt == len)
403 				  goto matched;
404 			      }
405 			  }
406 		      }
407 
408 		    c = *p++;
409 		  }
410 #endif
411 		else if (c == L('\0'))
412 		  /* [ (unterminated) loses.  */
413 		  return FNM_NOMATCH;
414 		else
415 		  {
416 		    bool is_range = false;
417 
418 #ifdef _LIBC
419 		    bool is_seqval = false;
420 
421 		    if (c == L('[') && *p == L('.'))
422 		      {
423 			uint32_t nrules =
424 			  _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
425 			const CHAR *startp = p;
426 			size_t c1 = 0;
427 
428 			while (1)
429 			  {
430 			    c = *++p;
431 			    if (c == L('.') && p[1] == L(']'))
432 			      {
433 				p += 2;
434 				break;
435 			      }
436 			    if (c == '\0')
437 			      return FNM_NOMATCH;
438 			    ++c1;
439 			  }
440 
441 			/* We have to handling the symbols differently in
442 			   ranges since then the collation sequence is
443 			   important.  */
444 			is_range = *p == L('-') && p[1] != L('\0');
445 
446 			if (nrules == 0)
447 			  {
448 			    /* There are no names defined in the collation
449 			       data.  Therefore we only accept the trivial
450 			       names consisting of the character itself.  */
451 			    if (c1 != 1)
452 			      return FNM_NOMATCH;
453 
454 			    if (!is_range && *n == startp[1])
455 			      goto matched;
456 
457 			    cold = startp[1];
458 			    c = *p++;
459 			  }
460 			else
461 			  {
462 			    int32_t table_size;
463 			    const int32_t *symb_table;
464 # ifdef WIDE_CHAR_VERSION
465 			    char str[c1];
466 			    size_t strcnt;
467 # else
468 #  define str (startp + 1)
469 # endif
470 			    const unsigned char *extra;
471 			    int32_t idx;
472 			    int32_t elem;
473 			    int32_t second;
474 			    int32_t hash;
475 
476 # ifdef WIDE_CHAR_VERSION
477 			    /* We have to convert the name to a single-byte
478 			       string.  This is possible since the names
479 			       consist of ASCII characters and the internal
480 			       representation is UCS4.  */
481 			    for (strcnt = 0; strcnt < c1; ++strcnt)
482 			      str[strcnt] = startp[1 + strcnt];
483 # endif
484 
485 			    table_size =
486 			      _NL_CURRENT_WORD (LC_COLLATE,
487 						_NL_COLLATE_SYMB_HASH_SIZEMB);
488 			    symb_table = (const int32_t *)
489 			      _NL_CURRENT (LC_COLLATE,
490 					   _NL_COLLATE_SYMB_TABLEMB);
491 			    extra = (const unsigned char *)
492 			      _NL_CURRENT (LC_COLLATE,
493 					   _NL_COLLATE_SYMB_EXTRAMB);
494 
495 			    /* Locate the character in the hashing table.  */
496 			    hash = elem_hash (str, c1);
497 
498 			    idx = 0;
499 			    elem = hash % table_size;
500 			    second = hash % (table_size - 2);
501 			    while (symb_table[2 * elem] != 0)
502 			      {
503 				/* First compare the hashing value.  */
504 				if (symb_table[2 * elem] == hash
505 				    && c1 == extra[symb_table[2 * elem + 1]]
506 				    && memcmp (str,
507 					       &extra[symb_table[2 * elem + 1]
508 						     + 1], c1) == 0)
509 				  {
510 				    /* Yep, this is the entry.  */
511 				    idx = symb_table[2 * elem + 1];
512 				    idx += 1 + extra[idx];
513 				    break;
514 				  }
515 
516 				/* Next entry.  */
517 				elem += second;
518 			      }
519 
520 			    if (symb_table[2 * elem] != 0)
521 			      {
522 				/* Compare the byte sequence but only if
523 				   this is not part of a range.  */
524 # ifdef WIDE_CHAR_VERSION
525 				int32_t *wextra;
526 
527 				idx += 1 + extra[idx];
528 				/* Adjust for the alignment.  */
529 				idx = (idx + 3) & ~3;
530 
531 				wextra = (int32_t *) &extra[idx + 4];
532 # endif
533 
534 				if (! is_range)
535 				  {
536 # ifdef WIDE_CHAR_VERSION
537 				    for (c1 = 0;
538 					 (int32_t) c1 < wextra[idx];
539 					 ++c1)
540 				      if (n[c1] != wextra[1 + c1])
541 					break;
542 
543 				    if ((int32_t) c1 == wextra[idx])
544 				      goto matched;
545 # else
546 				    for (c1 = 0; c1 < extra[idx]; ++c1)
547 				      if (n[c1] != extra[1 + c1])
548 					break;
549 
550 				    if (c1 == extra[idx])
551 				      goto matched;
552 # endif
553 				  }
554 
555 				/* Get the collation sequence value.  */
556 				is_seqval = true;
557 # ifdef WIDE_CHAR_VERSION
558 				cold = wextra[1 + wextra[idx]];
559 # else
560 				/* Adjust for the alignment.  */
561 				idx += 1 + extra[idx];
562 				idx = (idx + 3) & ~4;
563 				cold = *((int32_t *) &extra[idx]);
564 # endif
565 
566 				c = *p++;
567 			      }
568 			    else if (c1 == 1)
569 			      {
570 				/* No valid character.  Match it as a
571 				   single byte.  */
572 				if (!is_range && *n == str[0])
573 				  goto matched;
574 
575 				cold = str[0];
576 				c = *p++;
577 			      }
578 			    else
579 			      return FNM_NOMATCH;
580 			  }
581 		      }
582 		    else
583 # undef str
584 #endif
585 		      {
586 			c = FOLD (c);
587 		      normal_bracket:
588 
589 			/* We have to handling the symbols differently in
590 			   ranges since then the collation sequence is
591 			   important.  */
592 			is_range = (*p == L('-') && p[1] != L('\0')
593 				    && p[1] != L(']'));
594 
595 			if (!is_range && c == fn)
596 			  goto matched;
597 
598 			cold = c;
599 			c = *p++;
600 		      }
601 
602 		    if (c == L('-') && *p != L(']'))
603 		      {
604 #if _LIBC
605 			/* We have to find the collation sequence
606 			   value for C.  Collation sequence is nothing
607 			   we can regularly access.  The sequence
608 			   value is defined by the order in which the
609 			   definitions of the collation values for the
610 			   various characters appear in the source
611 			   file.  A strange concept, nowhere
612 			   documented.  */
613 			uint32_t fcollseq;
614 			uint32_t lcollseq;
615 			UCHAR cend = *p++;
616 
617 # ifdef WIDE_CHAR_VERSION
618 			/* Search in the `names' array for the characters.  */
619 			fcollseq = __collseq_table_lookup (collseq, fn);
620 			if (fcollseq == ~((uint32_t) 0))
621 			  /* XXX We don't know anything about the character
622 			     we are supposed to match.  This means we are
623 			     failing.  */
624 			  goto range_not_matched;
625 
626 			if (is_seqval)
627 			  lcollseq = cold;
628 			else
629 			  lcollseq = __collseq_table_lookup (collseq, cold);
630 # else
631 			fcollseq = collseq[fn];
632 			lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
633 # endif
634 
635 			is_seqval = false;
636 			if (cend == L('[') && *p == L('.'))
637 			  {
638 			    uint32_t nrules =
639 			      _NL_CURRENT_WORD (LC_COLLATE,
640 						_NL_COLLATE_NRULES);
641 			    const CHAR *startp = p;
642 			    size_t c1 = 0;
643 
644 			    while (1)
645 			      {
646 				c = *++p;
647 				if (c == L('.') && p[1] == L(']'))
648 				  {
649 				    p += 2;
650 				    break;
651 				  }
652 				if (c == '\0')
653 				  return FNM_NOMATCH;
654 				++c1;
655 			      }
656 
657 			    if (nrules == 0)
658 			      {
659 				/* There are no names defined in the
660 				   collation data.  Therefore we only
661 				   accept the trivial names consisting
662 				   of the character itself.  */
663 				if (c1 != 1)
664 				  return FNM_NOMATCH;
665 
666 				cend = startp[1];
667 			      }
668 			    else
669 			      {
670 				int32_t table_size;
671 				const int32_t *symb_table;
672 # ifdef WIDE_CHAR_VERSION
673 				char str[c1];
674 				size_t strcnt;
675 # else
676 #  define str (startp + 1)
677 # endif
678 				const unsigned char *extra;
679 				int32_t idx;
680 				int32_t elem;
681 				int32_t second;
682 				int32_t hash;
683 
684 # ifdef WIDE_CHAR_VERSION
685 				/* We have to convert the name to a single-byte
686 				   string.  This is possible since the names
687 				   consist of ASCII characters and the internal
688 				   representation is UCS4.  */
689 				for (strcnt = 0; strcnt < c1; ++strcnt)
690 				  str[strcnt] = startp[1 + strcnt];
691 # endif
692 
693 				table_size =
694 				  _NL_CURRENT_WORD (LC_COLLATE,
695 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
696 				symb_table = (const int32_t *)
697 				  _NL_CURRENT (LC_COLLATE,
698 					       _NL_COLLATE_SYMB_TABLEMB);
699 				extra = (const unsigned char *)
700 				  _NL_CURRENT (LC_COLLATE,
701 					       _NL_COLLATE_SYMB_EXTRAMB);
702 
703 				/* Locate the character in the hashing
704                                    table.  */
705 				hash = elem_hash (str, c1);
706 
707 				idx = 0;
708 				elem = hash % table_size;
709 				second = hash % (table_size - 2);
710 				while (symb_table[2 * elem] != 0)
711 				  {
712 				/* First compare the hashing value.  */
713 				    if (symb_table[2 * elem] == hash
714 					&& (c1
715 					    == extra[symb_table[2 * elem + 1]])
716 					&& memcmp (str,
717 						   &extra[symb_table[2 * elem + 1]
718 							 + 1], c1) == 0)
719 				      {
720 					/* Yep, this is the entry.  */
721 					idx = symb_table[2 * elem + 1];
722 					idx += 1 + extra[idx];
723 					break;
724 				      }
725 
726 				    /* Next entry.  */
727 				    elem += second;
728 				  }
729 
730 				if (symb_table[2 * elem] != 0)
731 				  {
732 				    /* Compare the byte sequence but only if
733 				       this is not part of a range.  */
734 # ifdef WIDE_CHAR_VERSION
735 				    int32_t *wextra;
736 
737 				    idx += 1 + extra[idx];
738 				    /* Adjust for the alignment.  */
739 				    idx = (idx + 3) & ~4;
740 
741 				    wextra = (int32_t *) &extra[idx + 4];
742 # endif
743 				    /* Get the collation sequence value.  */
744 				    is_seqval = true;
745 # ifdef WIDE_CHAR_VERSION
746 				    cend = wextra[1 + wextra[idx]];
747 # else
748 				    /* Adjust for the alignment.  */
749 				    idx += 1 + extra[idx];
750 				    idx = (idx + 3) & ~4;
751 				    cend = *((int32_t *) &extra[idx]);
752 # endif
753 				  }
754 				else if (symb_table[2 * elem] != 0 && c1 == 1)
755 				  {
756 				    cend = str[0];
757 				    c = *p++;
758 				  }
759 				else
760 				  return FNM_NOMATCH;
761 			      }
762 # undef str
763 			  }
764 			else
765 			  {
766 			    if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
767 			      cend = *p++;
768 			    if (cend == L('\0'))
769 			      return FNM_NOMATCH;
770 			    cend = FOLD (cend);
771 			  }
772 
773 			/* XXX It is not entirely clear to me how to handle
774 			   characters which are not mentioned in the
775 			   collation specification.  */
776 			if (
777 # ifdef WIDE_CHAR_VERSION
778 			    lcollseq == 0xffffffff ||
779 # endif
780 			    lcollseq <= fcollseq)
781 			  {
782 			    /* We have to look at the upper bound.  */
783 			    uint32_t hcollseq;
784 
785 			    if (is_seqval)
786 			      hcollseq = cend;
787 			    else
788 			      {
789 # ifdef WIDE_CHAR_VERSION
790 				hcollseq =
791 				  __collseq_table_lookup (collseq, cend);
792 				if (hcollseq == ~((uint32_t) 0))
793 				  {
794 				    /* Hum, no information about the upper
795 				       bound.  The matching succeeds if the
796 				       lower bound is matched exactly.  */
797 				    if (lcollseq != fcollseq)
798 				      goto range_not_matched;
799 
800 				    goto matched;
801 				  }
802 # else
803 				hcollseq = collseq[cend];
804 # endif
805 			      }
806 
807 			    if (lcollseq <= hcollseq && fcollseq <= hcollseq)
808 			      goto matched;
809 			  }
810 # ifdef WIDE_CHAR_VERSION
811 		      range_not_matched:
812 # endif
813 #else
814 			/* We use a boring value comparison of the character
815 			   values.  This is better than comparing using
816 			   `strcoll' since the latter would have surprising
817 			   and sometimes fatal consequences.  */
818 			UCHAR cend = *p++;
819 
820 			if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
821 			  cend = *p++;
822 			if (cend == L('\0'))
823 			  return FNM_NOMATCH;
824 
825 			/* It is a range.  */
826 			if (cold <= fn && fn <= cend)
827 			  goto matched;
828 #endif
829 
830 			c = *p++;
831 		      }
832 		  }
833 
834 		if (c == L(']'))
835 		  break;
836 	      }
837 
838 	    if (!not)
839 	      return FNM_NOMATCH;
840 	    break;
841 
842 	  matched:
843 	    /* Skip the rest of the [...] that already matched.  */
844 	    do
845 	      {
846 	      ignore_next:
847 		c = *p++;
848 
849 		if (c == L('\0'))
850 		  /* [... (unterminated) loses.  */
851 		  return FNM_NOMATCH;
852 
853 		if (!(flags & FNM_NOESCAPE) && c == L('\\'))
854 		  {
855 		    if (*p == L('\0'))
856 		      return FNM_NOMATCH;
857 		    /* XXX 1003.2d11 is unclear if this is right.  */
858 		    ++p;
859 		  }
860 		else if (c == L('[') && *p == L(':'))
861 		  {
862 		    int c1 = 0;
863 		    const CHAR *startp = p;
864 
865 		    while (1)
866 		      {
867 			c = *++p;
868 			if (++c1 == CHAR_CLASS_MAX_LENGTH)
869 			  return FNM_NOMATCH;
870 
871 			if (*p == L(':') && p[1] == L(']'))
872 			  break;
873 
874 			if (c < L('a') || c >= L('z'))
875 			  {
876 			    p = startp;
877 			    goto ignore_next;
878 			  }
879 		      }
880 		    p += 2;
881 		    c = *p++;
882 		  }
883 		else if (c == L('[') && *p == L('='))
884 		  {
885 		    c = *++p;
886 		    if (c == L('\0'))
887 		      return FNM_NOMATCH;
888 		    c = *++p;
889 		    if (c != L('=') || p[1] != L(']'))
890 		      return FNM_NOMATCH;
891 		    p += 2;
892 		    c = *p++;
893 		  }
894 		else if (c == L('[') && *p == L('.'))
895 		  {
896 		    ++p;
897 		    while (1)
898 		      {
899 			c = *++p;
900 			if (c == '\0')
901 			  return FNM_NOMATCH;
902 
903 			if (*p == L('.') && p[1] == L(']'))
904 			  break;
905 		      }
906 		    p += 2;
907 		    c = *p++;
908 		  }
909 	      }
910 	    while (c != L(']'));
911 	    if (not)
912 	      return FNM_NOMATCH;
913 	  }
914 	  break;
915 
916 	case L('+'):
917 	case L('@'):
918 	case L('!'):
919 	  if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
920 	    {
921 	      int res;
922 
923 	      res = EXT (c, p, n, string_end, no_leading_period, flags);
924 	      if (res != -1)
925 		return res;
926 	    }
927 	  goto normal_match;
928 
929 	case L('/'):
930 	  if (NO_LEADING_PERIOD (flags))
931 	    {
932 	      if (n == string_end || c != (UCHAR) *n)
933 		return FNM_NOMATCH;
934 
935 	      new_no_leading_period = true;
936 	      break;
937 	    }
938 	  /* FALLTHROUGH */
939 	default:
940 	normal_match:
941 	  if (n == string_end || c != FOLD ((UCHAR) *n))
942 	    return FNM_NOMATCH;
943 	}
944 
945       no_leading_period = new_no_leading_period;
946       ++n;
947     }
948 
949   if (n == string_end)
950     return 0;
951 
952   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
953     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
954     return 0;
955 
956   return FNM_NOMATCH;
957 }
958 
959 
960 static const CHAR *
961 internal_function
END(const CHAR * pattern)962 END (const CHAR *pattern)
963 {
964   const CHAR *p = pattern;
965 
966   while (1)
967     if (*++p == L('\0'))
968       /* This is an invalid pattern.  */
969       return pattern;
970     else if (*p == L('['))
971       {
972 	/* Handle brackets special.  */
973 	if (posixly_correct == 0)
974 	  posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
975 
976 	/* Skip the not sign.  We have to recognize it because of a possibly
977 	   following ']'.  */
978 	if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
979 	  ++p;
980 	/* A leading ']' is recognized as such.  */
981 	if (*p == L(']'))
982 	  ++p;
983 	/* Skip over all characters of the list.  */
984 	while (*p != L(']'))
985 	  if (*p++ == L('\0'))
986 	    /* This is no valid pattern.  */
987 	    return pattern;
988       }
989     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
990 	      || *p == L('!')) && p[1] == L('('))
991       p = END (p + 1);
992     else if (*p == L(')'))
993       break;
994 
995   return p + 1;
996 }
997 
998 
999 static int
1000 internal_function
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)1001 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1002      bool no_leading_period, int flags)
1003 {
1004   const CHAR *startp;
1005   size_t level;
1006   struct patternlist
1007   {
1008     struct patternlist *next;
1009     CHAR str[1];
1010   } *list = NULL;
1011   struct patternlist **lastp = &list;
1012   size_t pattern_len = STRLEN (pattern);
1013   const CHAR *p;
1014   const CHAR *rs;
1015   enum { ALLOCA_LIMIT = 8000 };
1016 
1017   /* Parse the pattern.  Store the individual parts in the list.  */
1018   level = 0;
1019   for (startp = p = pattern + 1; ; ++p)
1020     if (*p == L('\0'))
1021       /* This is an invalid pattern.  */
1022       return -1;
1023     else if (*p == L('['))
1024       {
1025 	/* Handle brackets special.  */
1026 	if (posixly_correct == 0)
1027 	  posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1028 
1029 	/* Skip the not sign.  We have to recognize it because of a possibly
1030 	   following ']'.  */
1031 	if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1032 	  ++p;
1033 	/* A leading ']' is recognized as such.  */
1034 	if (*p == L(']'))
1035 	  ++p;
1036 	/* Skip over all characters of the list.  */
1037 	while (*p != L(']'))
1038 	  if (*p++ == L('\0'))
1039 	    /* This is no valid pattern.  */
1040 	    return -1;
1041       }
1042     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1043 	      || *p == L('!')) && p[1] == L('('))
1044       /* Remember the nesting level.  */
1045       ++level;
1046     else if (*p == L(')'))
1047       {
1048 	if (level-- == 0)
1049 	  {
1050 	    /* This means we found the end of the pattern.  */
1051 #define NEW_PATTERN \
1052 	    struct patternlist *newp;					      \
1053 	    size_t plen;						      \
1054 	    size_t plensize;						      \
1055 	    size_t newpsize;						      \
1056 									      \
1057 	    plen = (opt == L('?') || opt == L('@')			      \
1058 		    ? pattern_len					      \
1059 		    : p - startp + 1);					      \
1060 	    plensize = plen * sizeof (CHAR);				      \
1061 	    newpsize = offsetof (struct patternlist, str) + plensize;	      \
1062 	    if ((size_t) -1 / sizeof (CHAR) < plen			      \
1063 		|| newpsize < offsetof (struct patternlist, str)	      \
1064 		|| ALLOCA_LIMIT <= newpsize)				      \
1065 	      return -1;						      \
1066 	    newp = (struct patternlist *) alloca (newpsize);		      \
1067 	    *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1068 	    newp->next = NULL;						      \
1069 	    *lastp = newp;						      \
1070 	    lastp = &newp->next
1071 	    NEW_PATTERN;
1072 	    break;
1073 	  }
1074       }
1075     else if (*p == L('|'))
1076       {
1077 	if (level == 0)
1078 	  {
1079 	    NEW_PATTERN;
1080 	    startp = p + 1;
1081 	  }
1082       }
1083   assert (list != NULL);
1084   assert (p[-1] == L(')'));
1085 #undef NEW_PATTERN
1086 
1087   switch (opt)
1088     {
1089     case L('*'):
1090       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1091 	return 0;
1092       /* FALLTHROUGH */
1093 
1094     case L('+'):
1095       do
1096 	{
1097 	  for (rs = string; rs <= string_end; ++rs)
1098 	    /* First match the prefix with the current pattern with the
1099 	       current pattern.  */
1100 	    if (FCT (list->str, string, rs, no_leading_period,
1101 		     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1102 		/* This was successful.  Now match the rest with the rest
1103 		   of the pattern.  */
1104 		&& (FCT (p, rs, string_end,
1105 			 rs == string
1106 			 ? no_leading_period
1107 			 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1108 			 flags & FNM_FILE_NAME
1109 			 ? flags : flags & ~FNM_PERIOD) == 0
1110 		    /* This didn't work.  Try the whole pattern.  */
1111 		    || (rs != string
1112 			&& FCT (pattern - 1, rs, string_end,
1113 				rs == string
1114 				? no_leading_period
1115 				: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1116 				flags & FNM_FILE_NAME
1117 				? flags : flags & ~FNM_PERIOD) == 0)))
1118 	      /* It worked.  Signal success.  */
1119 	      return 0;
1120 	}
1121       while ((list = list->next) != NULL);
1122 
1123       /* None of the patterns lead to a match.  */
1124       return FNM_NOMATCH;
1125 
1126     case L('?'):
1127       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1128 	return 0;
1129       /* FALLTHROUGH */
1130 
1131     case L('@'):
1132       do
1133 	/* I cannot believe it but `strcat' is actually acceptable
1134 	   here.  Match the entire string with the prefix from the
1135 	   pattern list and the rest of the pattern following the
1136 	   pattern list.  */
1137 	if (FCT (STRCAT (list->str, p), string, string_end,
1138 		 no_leading_period,
1139 		 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1140 	  /* It worked.  Signal success.  */
1141 	  return 0;
1142       while ((list = list->next) != NULL);
1143 
1144       /* None of the patterns lead to a match.  */
1145       return FNM_NOMATCH;
1146 
1147     case L('!'):
1148       for (rs = string; rs <= string_end; ++rs)
1149 	{
1150 	  struct patternlist *runp;
1151 
1152 	  for (runp = list; runp != NULL; runp = runp->next)
1153 	    if (FCT (runp->str, string, rs,  no_leading_period,
1154 		     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1155 	      break;
1156 
1157 	  /* If none of the patterns matched see whether the rest does.  */
1158 	  if (runp == NULL
1159 	      && (FCT (p, rs, string_end,
1160 		       rs == string
1161 		       ? no_leading_period
1162 		       : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1163 		       flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1164 		  == 0))
1165 	    /* This is successful.  */
1166 	    return 0;
1167 	}
1168 
1169       /* None of the patterns together with the rest of the pattern
1170 	 lead to a match.  */
1171       return FNM_NOMATCH;
1172 
1173     default:
1174       assert (! "Invalid extended matching operator");
1175       break;
1176     }
1177 
1178   return -1;
1179 }
1180 
1181 
1182 #undef FOLD
1183 #undef CHAR
1184 #undef UCHAR
1185 #undef INT
1186 #undef FCT
1187 #undef EXT
1188 #undef END
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRCOLL
1192 #undef STRLEN
1193 #undef STRCAT
1194 #undef L
1195 #undef BTOWC
1196