1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18 /* Match STRING against the file name pattern PATTERN, returning zero if
19 it matches, nonzero if not. */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21 const CHAR *string_end, bool no_leading_period, int flags)
22 internal_function;
23 static const CHAR *END (const CHAR *patternp) internal_function;
24
25 static int
26 internal_function
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28 bool no_leading_period, int flags)
29 {
30 register const CHAR *p = pattern, *n = string;
31 register UCHAR c;
32 #ifdef _LIBC
33 # if WIDE_CHAR_VERSION
34 const char *collseq = (const char *)
35 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
36 # else
37 const UCHAR *collseq = (const UCHAR *)
38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
39 # endif
40 #endif
41
42 while ((c = *p++) != L_('\0'))
43 {
44 bool new_no_leading_period = false;
45 c = FOLD (c);
46
47 switch (c)
48 {
49 case L_('?'):
50 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
51 {
52 int res;
53
54 res = EXT (c, p, n, string_end, no_leading_period,
55 flags);
56 if (res != -1)
57 return res;
58 }
59
60 if (n == string_end)
61 return FNM_NOMATCH;
62 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
63 return FNM_NOMATCH;
64 else if (*n == L_('.') && no_leading_period)
65 return FNM_NOMATCH;
66 break;
67
68 case L_('\\'):
69 if (!(flags & FNM_NOESCAPE))
70 {
71 c = *p++;
72 if (c == L_('\0'))
73 /* Trailing \ loses. */
74 return FNM_NOMATCH;
75 c = FOLD (c);
76 }
77 if (n == string_end || FOLD ((UCHAR) *n) != c)
78 return FNM_NOMATCH;
79 break;
80
81 case L_('*'):
82 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
83 {
84 int res;
85
86 res = EXT (c, p, n, string_end, no_leading_period,
87 flags);
88 if (res != -1)
89 return res;
90 }
91
92 if (n != string_end && *n == L_('.') && no_leading_period)
93 return FNM_NOMATCH;
94
95 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
96 {
97 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
98 {
99 const CHAR *endp = END (p);
100 if (endp != p)
101 {
102 /* This is a pattern. Skip over it. */
103 p = endp;
104 continue;
105 }
106 }
107
108 if (c == L_('?'))
109 {
110 /* A ? needs to match one character. */
111 if (n == string_end)
112 /* There isn't another character; no match. */
113 return FNM_NOMATCH;
114 else if (*n == L_('/')
115 && __builtin_expect (flags & FNM_FILE_NAME, 0))
116 /* A slash does not match a wildcard under
117 FNM_FILE_NAME. */
118 return FNM_NOMATCH;
119 else
120 /* One character of the string is consumed in matching
121 this ? wildcard, so *??? won't match if there are
122 less than three characters. */
123 ++n;
124 }
125 }
126
127 if (c == L_('\0'))
128 /* The wildcard(s) is/are the last element of the pattern.
129 If the name is a file name and contains another slash
130 this means it cannot match, unless the FNM_LEADING_DIR
131 flag is set. */
132 {
133 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
134
135 if (flags & FNM_FILE_NAME)
136 {
137 if (flags & FNM_LEADING_DIR)
138 result = 0;
139 else
140 {
141 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
142 result = 0;
143 }
144 }
145
146 return result;
147 }
148 else
149 {
150 const CHAR *endp;
151
152 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
153 string_end - n);
154 if (endp == NULL)
155 endp = string_end;
156
157 if (c == L_('[')
158 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159 && (c == L_('@') || c == L_('+') || c == L_('!'))
160 && *p == L_('(')))
161 {
162 int flags2 = ((flags & FNM_FILE_NAME)
163 ? flags : (flags & ~FNM_PERIOD));
164 bool no_leading_period2 = no_leading_period;
165
166 for (--p; n < endp; ++n, no_leading_period2 = false)
167 if (FCT (p, n, string_end, no_leading_period2, flags2)
168 == 0)
169 return 0;
170 }
171 else if (c == L_('/') && (flags & FNM_FILE_NAME))
172 {
173 while (n < string_end && *n != L_('/'))
174 ++n;
175 if (n < string_end && *n == L_('/')
176 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
177 == 0))
178 return 0;
179 }
180 else
181 {
182 int flags2 = ((flags & FNM_FILE_NAME)
183 ? flags : (flags & ~FNM_PERIOD));
184 int no_leading_period2 = no_leading_period;
185
186 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
187 c = *p;
188 c = FOLD (c);
189 for (--p; n < endp; ++n, no_leading_period2 = false)
190 if (FOLD ((UCHAR) *n) == c
191 && (FCT (p, n, string_end, no_leading_period2, flags2)
192 == 0))
193 return 0;
194 }
195 }
196
197 /* If we come here no match is possible with the wildcard. */
198 return FNM_NOMATCH;
199
200 case L_('['):
201 {
202 /* Nonzero if the sense of the character class is inverted. */
203 register bool not;
204 CHAR cold;
205 UCHAR fn;
206
207 if (posixly_correct == 0)
208 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
209
210 if (n == string_end)
211 return FNM_NOMATCH;
212
213 if (*n == L_('.') && no_leading_period)
214 return FNM_NOMATCH;
215
216 if (*n == L_('/') && (flags & FNM_FILE_NAME))
217 /* `/' cannot be matched. */
218 return FNM_NOMATCH;
219
220 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
221 if (not)
222 ++p;
223
224 fn = FOLD ((UCHAR) *n);
225
226 c = *p++;
227 for (;;)
228 {
229 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
230 {
231 if (*p == L_('\0'))
232 return FNM_NOMATCH;
233 c = FOLD ((UCHAR) *p);
234 ++p;
235
236 if (c == fn)
237 goto matched;
238 }
239 else if (c == L_('[') && *p == L_(':'))
240 {
241 /* Leave room for the null. */
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243 size_t c1 = 0;
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
245 wctype_t wt;
246 #endif
247 const CHAR *startp = p;
248
249 for (;;)
250 {
251 if (c1 == CHAR_CLASS_MAX_LENGTH)
252 /* The name is too long and therefore the pattern
253 is ill-formed. */
254 return FNM_NOMATCH;
255
256 c = *++p;
257 if (c == L_(':') && p[1] == L_(']'))
258 {
259 p += 2;
260 break;
261 }
262 if (c < L_('a') || c >= L_('z'))
263 {
264 /* This cannot possibly be a character class name.
265 Match it as a normal range. */
266 p = startp;
267 c = L_('[');
268 goto normal_bracket;
269 }
270 str[c1++] = c;
271 }
272 str[c1] = L_('\0');
273
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 wt = IS_CHAR_CLASS (str);
276 if (wt == 0)
277 /* Invalid character class name. */
278 return FNM_NOMATCH;
279
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 /* The following code is glibc specific but does
282 there a good job in speeding up the code since
283 we can avoid the btowc() call. */
284 if (_ISCTYPE ((UCHAR) *n, wt))
285 goto matched;
286 # else
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
288 goto matched;
289 # endif
290 #else
291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
303 goto matched;
304 #endif
305 c = *p++;
306 }
307 #ifdef _LIBC
308 else if (c == L_('[') && *p == L_('='))
309 {
310 UCHAR str[1];
311 uint32_t nrules =
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 const CHAR *startp = p;
314
315 c = *++p;
316 if (c == L_('\0'))
317 {
318 p = startp;
319 c = L_('[');
320 goto normal_bracket;
321 }
322 str[0] = c;
323
324 c = *++p;
325 if (c != L_('=') || p[1] != L_(']'))
326 {
327 p = startp;
328 c = L_('[');
329 goto normal_bracket;
330 }
331 p += 2;
332
333 if (nrules == 0)
334 {
335 if ((UCHAR) *n == str[0])
336 goto matched;
337 }
338 else
339 {
340 const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 const int32_t *weights;
343 const int32_t *extra;
344 # else
345 const unsigned char *weights;
346 const unsigned char *extra;
347 # endif
348 const int32_t *indirect;
349 int32_t idx;
350 const UCHAR *cp = (const UCHAR *) str;
351
352 /* This #include defines a local function! */
353 # if WIDE_CHAR_VERSION
354 # include <locale/weightwc.h>
355 # else
356 # include <locale/weight.h>
357 # endif
358
359 # if WIDE_CHAR_VERSION
360 table = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 weights = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 extra = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 indirect = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368 # else
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 weights = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 extra = (const unsigned char *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
377 # endif
378
379 idx = findidx (&cp);
380 if (idx != 0)
381 {
382 /* We found a table entry. Now see whether the
383 character we are currently at has the same
384 equivalance class value. */
385 int len = weights[idx];
386 int32_t idx2;
387 const UCHAR *np = (const UCHAR *) n;
388
389 idx2 = findidx (&np);
390 if (idx2 != 0 && len == weights[idx2])
391 {
392 int cnt = 0;
393
394 while (cnt < len
395 && (weights[idx + 1 + cnt]
396 == weights[idx2 + 1 + cnt]))
397 ++cnt;
398
399 if (cnt == len)
400 goto matched;
401 }
402 }
403 }
404
405 c = *p++;
406 }
407 #endif
408 else if (c == L_('\0'))
409 /* [ (unterminated) loses. */
410 return FNM_NOMATCH;
411 else
412 {
413 bool is_range = false;
414
415 #ifdef _LIBC
416 bool is_seqval = false;
417
418 if (c == L_('[') && *p == L_('.'))
419 {
420 uint32_t nrules =
421 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422 const CHAR *startp = p;
423 size_t c1 = 0;
424
425 while (1)
426 {
427 c = *++p;
428 if (c == L_('.') && p[1] == L_(']'))
429 {
430 p += 2;
431 break;
432 }
433 if (c == '\0')
434 return FNM_NOMATCH;
435 ++c1;
436 }
437
438 /* We have to handling the symbols differently in
439 ranges since then the collation sequence is
440 important. */
441 is_range = *p == L_('-') && p[1] != L_('\0');
442
443 if (nrules == 0)
444 {
445 /* There are no names defined in the collation
446 data. Therefore we only accept the trivial
447 names consisting of the character itself. */
448 if (c1 != 1)
449 return FNM_NOMATCH;
450
451 if (!is_range && *n == startp[1])
452 goto matched;
453
454 cold = startp[1];
455 c = *p++;
456 }
457 else
458 {
459 int32_t table_size;
460 const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
462 char str[c1];
463 size_t strcnt;
464 # else
465 # define str (startp + 1)
466 # endif
467 const unsigned char *extra;
468 int32_t idx;
469 int32_t elem;
470 int32_t second;
471 int32_t hash;
472
473 # ifdef WIDE_CHAR_VERSION
474 /* We have to convert the name to a single-byte
475 string. This is possible since the names
476 consist of ASCII characters and the internal
477 representation is UCS4. */
478 for (strcnt = 0; strcnt < c1; ++strcnt)
479 str[strcnt] = startp[1 + strcnt];
480 # endif
481
482 table_size =
483 _NL_CURRENT_WORD (LC_COLLATE,
484 _NL_COLLATE_SYMB_HASH_SIZEMB);
485 symb_table = (const int32_t *)
486 _NL_CURRENT (LC_COLLATE,
487 _NL_COLLATE_SYMB_TABLEMB);
488 extra = (const unsigned char *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_EXTRAMB);
491
492 /* Locate the character in the hashing table. */
493 hash = elem_hash (str, c1);
494
495 idx = 0;
496 elem = hash % table_size;
497 second = hash % (table_size - 2);
498 while (symb_table[2 * elem] != 0)
499 {
500 /* First compare the hashing value. */
501 if (symb_table[2 * elem] == hash
502 && c1 == extra[symb_table[2 * elem + 1]]
503 && memcmp (str,
504 &extra[symb_table[2 * elem + 1]
505 + 1], c1) == 0)
506 {
507 /* Yep, this is the entry. */
508 idx = symb_table[2 * elem + 1];
509 idx += 1 + extra[idx];
510 break;
511 }
512
513 /* Next entry. */
514 elem += second;
515 }
516
517 if (symb_table[2 * elem] != 0)
518 {
519 /* Compare the byte sequence but only if
520 this is not part of a range. */
521 # ifdef WIDE_CHAR_VERSION
522 int32_t *wextra;
523
524 idx += 1 + extra[idx];
525 /* Adjust for the alignment. */
526 idx = (idx + 3) & ~3;
527
528 wextra = (int32_t *) &extra[idx + 4];
529 # endif
530
531 if (! is_range)
532 {
533 # ifdef WIDE_CHAR_VERSION
534 for (c1 = 0;
535 (int32_t) c1 < wextra[idx];
536 ++c1)
537 if (n[c1] != wextra[1 + c1])
538 break;
539
540 if ((int32_t) c1 == wextra[idx])
541 goto matched;
542 # else
543 for (c1 = 0; c1 < extra[idx]; ++c1)
544 if (n[c1] != extra[1 + c1])
545 break;
546
547 if (c1 == extra[idx])
548 goto matched;
549 # endif
550 }
551
552 /* Get the collation sequence value. */
553 is_seqval = true;
554 # ifdef WIDE_CHAR_VERSION
555 cold = wextra[1 + wextra[idx]];
556 # else
557 /* Adjust for the alignment. */
558 idx += 1 + extra[idx];
559 idx = (idx + 3) & ~4;
560 cold = *((int32_t *) &extra[idx]);
561 # endif
562
563 c = *p++;
564 }
565 else if (c1 == 1)
566 {
567 /* No valid character. Match it as a
568 single byte. */
569 if (!is_range && *n == str[0])
570 goto matched;
571
572 cold = str[0];
573 c = *p++;
574 }
575 else
576 return FNM_NOMATCH;
577 }
578 }
579 else
580 # undef str
581 #endif
582 {
583 c = FOLD (c);
584 normal_bracket:
585
586 /* We have to handling the symbols differently in
587 ranges since then the collation sequence is
588 important. */
589 is_range = (*p == L_('-') && p[1] != L_('\0')
590 && p[1] != L_(']'));
591
592 if (!is_range && c == fn)
593 goto matched;
594
595 cold = c;
596 c = *p++;
597 }
598
599 if (c == L_('-') && *p != L_(']'))
600 {
601 #if _LIBC
602 /* We have to find the collation sequence
603 value for C. Collation sequence is nothing
604 we can regularly access. The sequence
605 value is defined by the order in which the
606 definitions of the collation values for the
607 various characters appear in the source
608 file. A strange concept, nowhere
609 documented. */
610 uint32_t fcollseq;
611 uint32_t lcollseq;
612 UCHAR cend = *p++;
613
614 # ifdef WIDE_CHAR_VERSION
615 /* Search in the `names' array for the characters. */
616 fcollseq = __collseq_table_lookup (collseq, fn);
617 if (fcollseq == ~((uint32_t) 0))
618 /* XXX We don't know anything about the character
619 we are supposed to match. This means we are
620 failing. */
621 goto range_not_matched;
622
623 if (is_seqval)
624 lcollseq = cold;
625 else
626 lcollseq = __collseq_table_lookup (collseq, cold);
627 # else
628 fcollseq = collseq[fn];
629 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
630 # endif
631
632 is_seqval = false;
633 if (cend == L_('[') && *p == L_('.'))
634 {
635 uint32_t nrules =
636 _NL_CURRENT_WORD (LC_COLLATE,
637 _NL_COLLATE_NRULES);
638 const CHAR *startp = p;
639 size_t c1 = 0;
640
641 while (1)
642 {
643 c = *++p;
644 if (c == L_('.') && p[1] == L_(']'))
645 {
646 p += 2;
647 break;
648 }
649 if (c == '\0')
650 return FNM_NOMATCH;
651 ++c1;
652 }
653
654 if (nrules == 0)
655 {
656 /* There are no names defined in the
657 collation data. Therefore we only
658 accept the trivial names consisting
659 of the character itself. */
660 if (c1 != 1)
661 return FNM_NOMATCH;
662
663 cend = startp[1];
664 }
665 else
666 {
667 int32_t table_size;
668 const int32_t *symb_table;
669 # ifdef WIDE_CHAR_VERSION
670 char str[c1];
671 size_t strcnt;
672 # else
673 # define str (startp + 1)
674 # endif
675 const unsigned char *extra;
676 int32_t idx;
677 int32_t elem;
678 int32_t second;
679 int32_t hash;
680
681 # ifdef WIDE_CHAR_VERSION
682 /* We have to convert the name to a single-byte
683 string. This is possible since the names
684 consist of ASCII characters and the internal
685 representation is UCS4. */
686 for (strcnt = 0; strcnt < c1; ++strcnt)
687 str[strcnt] = startp[1 + strcnt];
688 # endif
689
690 table_size =
691 _NL_CURRENT_WORD (LC_COLLATE,
692 _NL_COLLATE_SYMB_HASH_SIZEMB);
693 symb_table = (const int32_t *)
694 _NL_CURRENT (LC_COLLATE,
695 _NL_COLLATE_SYMB_TABLEMB);
696 extra = (const unsigned char *)
697 _NL_CURRENT (LC_COLLATE,
698 _NL_COLLATE_SYMB_EXTRAMB);
699
700 /* Locate the character in the hashing
701 table. */
702 hash = elem_hash (str, c1);
703
704 idx = 0;
705 elem = hash % table_size;
706 second = hash % (table_size - 2);
707 while (symb_table[2 * elem] != 0)
708 {
709 /* First compare the hashing value. */
710 if (symb_table[2 * elem] == hash
711 && (c1
712 == extra[symb_table[2 * elem + 1]])
713 && memcmp (str,
714 &extra[symb_table[2 * elem + 1]
715 + 1], c1) == 0)
716 {
717 /* Yep, this is the entry. */
718 idx = symb_table[2 * elem + 1];
719 idx += 1 + extra[idx];
720 break;
721 }
722
723 /* Next entry. */
724 elem += second;
725 }
726
727 if (symb_table[2 * elem] != 0)
728 {
729 /* Compare the byte sequence but only if
730 this is not part of a range. */
731 # ifdef WIDE_CHAR_VERSION
732 int32_t *wextra;
733
734 idx += 1 + extra[idx];
735 /* Adjust for the alignment. */
736 idx = (idx + 3) & ~4;
737
738 wextra = (int32_t *) &extra[idx + 4];
739 # endif
740 /* Get the collation sequence value. */
741 is_seqval = true;
742 # ifdef WIDE_CHAR_VERSION
743 cend = wextra[1 + wextra[idx]];
744 # else
745 /* Adjust for the alignment. */
746 idx += 1 + extra[idx];
747 idx = (idx + 3) & ~4;
748 cend = *((int32_t *) &extra[idx]);
749 # endif
750 }
751 else if (symb_table[2 * elem] != 0 && c1 == 1)
752 {
753 cend = str[0];
754 c = *p++;
755 }
756 else
757 return FNM_NOMATCH;
758 }
759 # undef str
760 }
761 else
762 {
763 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
764 cend = *p++;
765 if (cend == L_('\0'))
766 return FNM_NOMATCH;
767 cend = FOLD (cend);
768 }
769
770 /* XXX It is not entirely clear to me how to handle
771 characters which are not mentioned in the
772 collation specification. */
773 if (
774 # ifdef WIDE_CHAR_VERSION
775 lcollseq == 0xffffffff ||
776 # endif
777 lcollseq <= fcollseq)
778 {
779 /* We have to look at the upper bound. */
780 uint32_t hcollseq;
781
782 if (is_seqval)
783 hcollseq = cend;
784 else
785 {
786 # ifdef WIDE_CHAR_VERSION
787 hcollseq =
788 __collseq_table_lookup (collseq, cend);
789 if (hcollseq == ~((uint32_t) 0))
790 {
791 /* Hum, no information about the upper
792 bound. The matching succeeds if the
793 lower bound is matched exactly. */
794 if (lcollseq != fcollseq)
795 goto range_not_matched;
796
797 goto matched;
798 }
799 # else
800 hcollseq = collseq[cend];
801 # endif
802 }
803
804 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
805 goto matched;
806 }
807 # ifdef WIDE_CHAR_VERSION
808 range_not_matched:
809 # endif
810 #else
811 /* We use a boring value comparison of the character
812 values. This is better than comparing using
813 `strcoll' since the latter would have surprising
814 and sometimes fatal consequences. */
815 UCHAR cend = *p++;
816
817 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
818 cend = *p++;
819 if (cend == L_('\0'))
820 return FNM_NOMATCH;
821
822 /* It is a range. */
823 if (cold <= fn && fn <= cend)
824 goto matched;
825 #endif
826
827 c = *p++;
828 }
829 }
830
831 if (c == L_(']'))
832 break;
833 }
834
835 if (!not)
836 return FNM_NOMATCH;
837 break;
838
839 matched:
840 /* Skip the rest of the [...] that already matched. */
841 do
842 {
843 ignore_next:
844 c = *p++;
845
846 if (c == L_('\0'))
847 /* [... (unterminated) loses. */
848 return FNM_NOMATCH;
849
850 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
851 {
852 if (*p == L_('\0'))
853 return FNM_NOMATCH;
854 /* XXX 1003.2d11 is unclear if this is right. */
855 ++p;
856 }
857 else if (c == L_('[') && *p == L_(':'))
858 {
859 int c1 = 0;
860 const CHAR *startp = p;
861
862 while (1)
863 {
864 c = *++p;
865 if (++c1 == CHAR_CLASS_MAX_LENGTH)
866 return FNM_NOMATCH;
867
868 if (*p == L_(':') && p[1] == L_(']'))
869 break;
870
871 if (c < L_('a') || c >= L_('z'))
872 {
873 p = startp;
874 goto ignore_next;
875 }
876 }
877 p += 2;
878 c = *p++;
879 }
880 else if (c == L_('[') && *p == L_('='))
881 {
882 c = *++p;
883 if (c == L_('\0'))
884 return FNM_NOMATCH;
885 c = *++p;
886 if (c != L_('=') || p[1] != L_(']'))
887 return FNM_NOMATCH;
888 p += 2;
889 c = *p++;
890 }
891 else if (c == L_('[') && *p == L_('.'))
892 {
893 ++p;
894 while (1)
895 {
896 c = *++p;
897 if (c == '\0')
898 return FNM_NOMATCH;
899
900 if (*p == L_('.') && p[1] == L_(']'))
901 break;
902 }
903 p += 2;
904 c = *p++;
905 }
906 }
907 while (c != L_(']'));
908 if (not)
909 return FNM_NOMATCH;
910 }
911 break;
912
913 case L_('+'):
914 case L_('@'):
915 case L_('!'):
916 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
917 {
918 int res;
919
920 res = EXT (c, p, n, string_end, no_leading_period, flags);
921 if (res != -1)
922 return res;
923 }
924 goto normal_match;
925
926 case L_('/'):
927 if (NO_LEADING_PERIOD (flags))
928 {
929 if (n == string_end || c != (UCHAR) *n)
930 return FNM_NOMATCH;
931
932 new_no_leading_period = true;
933 break;
934 }
935 /* FALLTHROUGH */
936 default:
937 normal_match:
938 if (n == string_end || c != FOLD ((UCHAR) *n))
939 return FNM_NOMATCH;
940 }
941
942 no_leading_period = new_no_leading_period;
943 ++n;
944 }
945
946 if (n == string_end)
947 return 0;
948
949 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
950 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
951 return 0;
952
953 return FNM_NOMATCH;
954 }
955
956
957 static const CHAR *
958 internal_function
END(const CHAR * pattern)959 END (const CHAR *pattern)
960 {
961 const CHAR *p = pattern;
962
963 while (1)
964 if (*++p == L_('\0'))
965 /* This is an invalid pattern. */
966 return pattern;
967 else if (*p == L_('['))
968 {
969 /* Handle brackets special. */
970 if (posixly_correct == 0)
971 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
972
973 /* Skip the not sign. We have to recognize it because of a possibly
974 following ']'. */
975 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
976 ++p;
977 /* A leading ']' is recognized as such. */
978 if (*p == L_(']'))
979 ++p;
980 /* Skip over all characters of the list. */
981 while (*p != L_(']'))
982 if (*p++ == L_('\0'))
983 /* This is no valid pattern. */
984 return pattern;
985 }
986 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
987 || *p == L_('!')) && p[1] == L_('('))
988 p = END (p + 1);
989 else if (*p == L_(')'))
990 break;
991
992 return p + 1;
993 }
994
995
996 static int
997 internal_function
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)998 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
999 bool no_leading_period, int flags)
1000 {
1001 const CHAR *startp;
1002 size_t level;
1003 struct patternlist
1004 {
1005 struct patternlist *next;
1006 int malloced;
1007 CHAR str[1];
1008 } *list = NULL;
1009 struct patternlist **lastp = &list;
1010 size_t pattern_len = STRLEN (pattern);
1011 const CHAR *p;
1012 const CHAR *rs;
1013 #if HAVE_ALLOCA || defined _LIBC
1014 enum { ALLOCA_LIMIT = 8000 };
1015 #else
1016 enum { ALLOCA_LIMIT = 0 };
1017 #endif
1018 int retval;
1019
1020 /* Parse the pattern. Store the individual parts in the list. */
1021 level = 0;
1022 for (startp = p = pattern + 1; ; ++p)
1023 if (*p == L_('\0'))
1024 /* This is an invalid pattern. */
1025 goto failed;
1026 else if (*p == L_('['))
1027 {
1028 /* Handle brackets special. */
1029 if (posixly_correct == 0)
1030 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1031
1032 /* Skip the not sign. We have to recognize it because of a possibly
1033 following ']'. */
1034 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1035 ++p;
1036 /* A leading ']' is recognized as such. */
1037 if (*p == L_(']'))
1038 ++p;
1039 /* Skip over all characters of the list. */
1040 while (*p != L_(']'))
1041 if (*p++ == L_('\0'))
1042 /* This is no valid pattern. */
1043 goto failed;
1044 }
1045 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1046 || *p == L_('!')) && p[1] == L_('('))
1047 /* Remember the nesting level. */
1048 ++level;
1049 else if (*p == L_(')'))
1050 {
1051 if (level-- == 0)
1052 {
1053 /* This means we found the end of the pattern. */
1054 #define NEW_PATTERN \
1055 struct patternlist *newp; \
1056 size_t plen; \
1057 size_t plensize; \
1058 size_t newpsize; \
1059 \
1060 plen = (opt == L_('?') || opt == L_('@') \
1061 ? pattern_len \
1062 : p - startp + 1); \
1063 plensize = plen * sizeof (CHAR); \
1064 newpsize = offsetof (struct patternlist, str) + plensize; \
1065 if ((size_t) -1 / sizeof (CHAR) < plen \
1066 || newpsize < offsetof (struct patternlist, str)) \
1067 goto failed; \
1068 if (newpsize < ALLOCA_LIMIT) \
1069 { \
1070 newp = (struct patternlist *) alloca (newpsize); \
1071 newp->malloced = 0; \
1072 } \
1073 else \
1074 { \
1075 newp = (struct patternlist *) malloc (newpsize); \
1076 if (!newp) \
1077 goto failed; \
1078 newp->malloced = 1; \
1079 } \
1080 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1081 newp->next = NULL; \
1082 *lastp = newp; \
1083 lastp = &newp->next
1084 NEW_PATTERN;
1085 break;
1086 }
1087 }
1088 else if (*p == L_('|'))
1089 {
1090 if (level == 0)
1091 {
1092 NEW_PATTERN;
1093 startp = p + 1;
1094 }
1095 }
1096 assert (list != NULL);
1097 assert (p[-1] == L_(')'));
1098 #undef NEW_PATTERN
1099
1100 switch (opt)
1101 {
1102 case L_('*'):
1103 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1104 {
1105 retval = 0;
1106 goto done;
1107 }
1108 /* FALLTHROUGH */
1109
1110 case L_('+'):
1111 do
1112 {
1113 struct patternlist *next;
1114
1115 for (rs = string; rs <= string_end; ++rs)
1116 /* First match the prefix with the current pattern with the
1117 current pattern. */
1118 if (FCT (list->str, string, rs, no_leading_period,
1119 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1120 /* This was successful. Now match the rest with the rest
1121 of the pattern. */
1122 && (FCT (p, rs, string_end,
1123 rs == string
1124 ? no_leading_period
1125 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1126 flags & FNM_FILE_NAME
1127 ? flags : flags & ~FNM_PERIOD) == 0
1128 /* This didn't work. Try the whole pattern. */
1129 || (rs != string
1130 && FCT (pattern - 1, rs, string_end,
1131 rs == string
1132 ? no_leading_period
1133 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1134 flags & FNM_FILE_NAME
1135 ? flags : flags & ~FNM_PERIOD) == 0)))
1136 {
1137 /* It worked. Signal success. */
1138 retval = 0;
1139 goto done;
1140 }
1141
1142 next = list->next;
1143 if (list->malloced)
1144 free (list);
1145 list = next;
1146 }
1147 while (list != NULL);
1148
1149 /* None of the patterns lead to a match. */
1150 return FNM_NOMATCH;
1151
1152 case L_('?'):
1153 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1154 {
1155 retval = 0;
1156 goto done;
1157 }
1158 /* FALLTHROUGH */
1159
1160 case L_('@'):
1161 do
1162 {
1163 struct patternlist *next;
1164
1165 /* I cannot believe it but `strcat' is actually acceptable
1166 here. Match the entire string with the prefix from the
1167 pattern list and the rest of the pattern following the
1168 pattern list. */
1169 if (FCT (STRCAT (list->str, p), string, string_end,
1170 no_leading_period,
1171 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1172 {
1173 /* It worked. Signal success. */
1174 retval = 0;
1175 goto done;
1176 }
1177
1178 next = list->next;
1179 if (list->malloced)
1180 free (list);
1181 list = next;
1182 }
1183 while (list != NULL);
1184
1185 /* None of the patterns lead to a match. */
1186 return FNM_NOMATCH;
1187
1188 case L_('!'):
1189 for (rs = string; rs <= string_end; ++rs)
1190 {
1191 struct patternlist *runp;
1192
1193 for (runp = list; runp != NULL; runp = runp->next)
1194 if (FCT (runp->str, string, rs, no_leading_period,
1195 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1196 break;
1197
1198 /* If none of the patterns matched see whether the rest does. */
1199 if (runp == NULL
1200 && (FCT (p, rs, string_end,
1201 rs == string
1202 ? no_leading_period
1203 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1204 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1205 == 0))
1206 {
1207 /* This is successful. */
1208 retval = 0;
1209 goto done;
1210 }
1211 }
1212
1213 /* None of the patterns together with the rest of the pattern
1214 lead to a match. */
1215 retval = FNM_NOMATCH;
1216 goto done;
1217
1218 default:
1219 assert (! "Invalid extended matching operator");
1220 break;
1221 }
1222
1223 failed:
1224 retval = -1;
1225 done:
1226 while (list != NULL)
1227 {
1228 struct patternlist *next = list->next;
1229
1230 if (list->malloced)
1231 free (list);
1232 list = next;
1233 }
1234 return retval;
1235 }
1236
1237
1238 #undef FOLD
1239 #undef CHAR
1240 #undef UCHAR
1241 #undef INT
1242 #undef FCT
1243 #undef EXT
1244 #undef END
1245 #undef MEMPCPY
1246 #undef MEMCHR
1247 #undef STRCOLL
1248 #undef STRLEN
1249 #undef STRCAT
1250 #undef L_
1251 #undef BTOWC
1252