1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
2 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17 #include <sys/cdefs.h>
18 __RCSID("$NetBSD: fnmatch_loop.c,v 1.2 2016/05/17 14:00:09 christos Exp $");
19
20
21 /* Match STRING against the file name pattern PATTERN, returning zero if
22 it matches, nonzero if not. */
23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
24 const CHAR *string_end, bool no_leading_period, int flags)
25 internal_function;
26 static const CHAR *END (const CHAR *patternp) internal_function;
27
28 static int
29 internal_function
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)30 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
31 bool no_leading_period, int flags)
32 {
33 register const CHAR *p = pattern, *n = string;
34 register UCHAR c;
35 #ifdef _LIBC
36 # if WIDE_CHAR_VERSION
37 const char *collseq = (const char *)
38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
39 # else
40 const UCHAR *collseq = (const UCHAR *)
41 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 # endif
43 #endif
44
45 while ((c = *p++) != L('\0'))
46 {
47 bool new_no_leading_period = false;
48 c = FOLD (c);
49
50 switch (c)
51 {
52 case L('?'):
53 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54 {
55 int res;
56
57 res = EXT (c, p, n, string_end, no_leading_period,
58 flags);
59 if (res != -1)
60 return res;
61 }
62
63 if (n == string_end)
64 return FNM_NOMATCH;
65 else if (*n == L('/') && (flags & FNM_FILE_NAME))
66 return FNM_NOMATCH;
67 else if (*n == L('.') && no_leading_period)
68 return FNM_NOMATCH;
69 break;
70
71 case L('\\'):
72 if (!(flags & FNM_NOESCAPE))
73 {
74 c = *p++;
75 if (c == L('\0'))
76 /* Trailing \ loses. */
77 return FNM_NOMATCH;
78 c = FOLD (c);
79 }
80 if (n == string_end || FOLD ((UCHAR) *n) != c)
81 return FNM_NOMATCH;
82 break;
83
84 case L('*'):
85 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86 {
87 int res;
88
89 res = EXT (c, p, n, string_end, no_leading_period,
90 flags);
91 if (res != -1)
92 return res;
93 }
94
95 if (n != string_end && *n == L('.') && no_leading_period)
96 return FNM_NOMATCH;
97
98 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
99 {
100 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
101 {
102 const CHAR *endp = END (p);
103 if (endp != p)
104 {
105 /* This is a pattern. Skip over it. */
106 p = endp;
107 continue;
108 }
109 }
110
111 if (c == L('?'))
112 {
113 /* A ? needs to match one character. */
114 if (n == string_end)
115 /* There isn't another character; no match. */
116 return FNM_NOMATCH;
117 else if (*n == L('/')
118 && __builtin_expect (flags & FNM_FILE_NAME, 0))
119 /* A slash does not match a wildcard under
120 FNM_FILE_NAME. */
121 return FNM_NOMATCH;
122 else
123 /* One character of the string is consumed in matching
124 this ? wildcard, so *??? won't match if there are
125 less than three characters. */
126 ++n;
127 }
128 }
129
130 if (c == L('\0'))
131 /* The wildcard(s) is/are the last element of the pattern.
132 If the name is a file name and contains another slash
133 this means it cannot match, unless the FNM_LEADING_DIR
134 flag is set. */
135 {
136 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
137
138 if (flags & FNM_FILE_NAME)
139 {
140 if (flags & FNM_LEADING_DIR)
141 result = 0;
142 else
143 {
144 if (MEMCHR (n, L('/'), string_end - n) == NULL)
145 result = 0;
146 }
147 }
148
149 return result;
150 }
151 else
152 {
153 const CHAR *endp;
154
155 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
156 string_end - n);
157 if (endp == NULL)
158 endp = string_end;
159
160 if (c == L('[')
161 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
162 && (c == L('@') || c == L('+') || c == L('!'))
163 && *p == L('(')))
164 {
165 int flags2 = ((flags & FNM_FILE_NAME)
166 ? flags : (flags & ~FNM_PERIOD));
167 bool no_leading_period2 = no_leading_period;
168
169 for (--p; n < endp; ++n, no_leading_period2 = false)
170 if (FCT (p, n, string_end, no_leading_period2, flags2)
171 == 0)
172 return 0;
173 }
174 else if (c == L('/') && (flags & FNM_FILE_NAME))
175 {
176 while (n < string_end && *n != L('/'))
177 ++n;
178 if (n < string_end && *n == L('/')
179 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
180 == 0))
181 return 0;
182 }
183 else
184 {
185 int flags2 = ((flags & FNM_FILE_NAME)
186 ? flags : (flags & ~FNM_PERIOD));
187 int no_leading_period2 = no_leading_period;
188
189 if (c == L('\\') && !(flags & FNM_NOESCAPE))
190 c = *p;
191 c = FOLD (c);
192 for (--p; n < endp; ++n, no_leading_period2 = false)
193 if (FOLD ((UCHAR) *n) == c
194 && (FCT (p, n, string_end, no_leading_period2, flags2)
195 == 0))
196 return 0;
197 }
198 }
199
200 /* If we come here no match is possible with the wildcard. */
201 return FNM_NOMATCH;
202
203 case L('['):
204 {
205 /* Nonzero if the sense of the character class is inverted. */
206 register bool not;
207 CHAR cold;
208 UCHAR fn;
209
210 if (posixly_correct == 0)
211 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
212
213 if (n == string_end)
214 return FNM_NOMATCH;
215
216 if (*n == L('.') && no_leading_period)
217 return FNM_NOMATCH;
218
219 if (*n == L('/') && (flags & FNM_FILE_NAME))
220 /* `/' cannot be matched. */
221 return FNM_NOMATCH;
222
223 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
224 if (not)
225 ++p;
226
227 fn = FOLD ((UCHAR) *n);
228
229 c = *p++;
230 for (;;)
231 {
232 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
233 {
234 if (*p == L('\0'))
235 return FNM_NOMATCH;
236 c = FOLD ((UCHAR) *p);
237 ++p;
238
239 if (c == fn)
240 goto matched;
241 }
242 else if (c == L('[') && *p == L(':'))
243 {
244 /* Leave room for the null. */
245 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
246 size_t c1 = 0;
247 #if defined _LIBC || WIDE_CHAR_SUPPORT
248 wctype_t wt;
249 #endif
250 const CHAR *startp = p;
251
252 for (;;)
253 {
254 if (c1 == CHAR_CLASS_MAX_LENGTH)
255 /* The name is too long and therefore the pattern
256 is ill-formed. */
257 return FNM_NOMATCH;
258
259 c = *++p;
260 if (c == L(':') && p[1] == L(']'))
261 {
262 p += 2;
263 break;
264 }
265 if (c < L('a') || c >= L('z'))
266 {
267 /* This cannot possibly be a character class name.
268 Match it as a normal range. */
269 p = startp;
270 c = L('[');
271 goto normal_bracket;
272 }
273 str[c1++] = c;
274 }
275 str[c1] = L('\0');
276
277 #if defined _LIBC || WIDE_CHAR_SUPPORT
278 wt = IS_CHAR_CLASS (str);
279 if (wt == 0)
280 /* Invalid character class name. */
281 return FNM_NOMATCH;
282
283 # if defined _LIBC && ! WIDE_CHAR_VERSION
284 /* The following code is glibc specific but does
285 there a good job in speeding up the code since
286 we can avoid the btowc() call. */
287 if (_ISCTYPE ((UCHAR) *n, wt))
288 goto matched;
289 # else
290 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291 goto matched;
292 # endif
293 #else
294 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
295 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
296 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
297 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
298 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
299 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
300 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
301 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
302 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
303 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
304 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
305 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
306 goto matched;
307 #endif
308 c = *p++;
309 }
310 #ifdef _LIBC
311 else if (c == L('[') && *p == L('='))
312 {
313 UCHAR str[1];
314 uint32_t nrules =
315 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
316 const CHAR *startp = p;
317
318 c = *++p;
319 if (c == L('\0'))
320 {
321 p = startp;
322 c = L('[');
323 goto normal_bracket;
324 }
325 str[0] = c;
326
327 c = *++p;
328 if (c != L('=') || p[1] != L(']'))
329 {
330 p = startp;
331 c = L('[');
332 goto normal_bracket;
333 }
334 p += 2;
335
336 if (nrules == 0)
337 {
338 if ((UCHAR) *n == str[0])
339 goto matched;
340 }
341 else
342 {
343 const int32_t *table;
344 # if WIDE_CHAR_VERSION
345 const int32_t *weights;
346 const int32_t *extra;
347 # else
348 const unsigned char *weights;
349 const unsigned char *extra;
350 # endif
351 const int32_t *indirect;
352 int32_t idx;
353 const UCHAR *cp = (const UCHAR *) str;
354
355 /* This #include defines a local function! */
356 # if WIDE_CHAR_VERSION
357 # include <locale/weightwc.h>
358 # else
359 # include <locale/weight.h>
360 # endif
361
362 # if WIDE_CHAR_VERSION
363 table = (const int32_t *)
364 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
365 weights = (const int32_t *)
366 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
367 extra = (const int32_t *)
368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
369 indirect = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
371 # else
372 table = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
374 weights = (const unsigned char *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
376 extra = (const unsigned char *)
377 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
378 indirect = (const int32_t *)
379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
380 # endif
381
382 idx = findidx (&cp);
383 if (idx != 0)
384 {
385 /* We found a table entry. Now see whether the
386 character we are currently at has the same
387 equivalance class value. */
388 int len = weights[idx];
389 int32_t idx2;
390 const UCHAR *np = (const UCHAR *) n;
391
392 idx2 = findidx (&np);
393 if (idx2 != 0 && len == weights[idx2])
394 {
395 int cnt = 0;
396
397 while (cnt < len
398 && (weights[idx + 1 + cnt]
399 == weights[idx2 + 1 + cnt]))
400 ++cnt;
401
402 if (cnt == len)
403 goto matched;
404 }
405 }
406 }
407
408 c = *p++;
409 }
410 #endif
411 else if (c == L('\0'))
412 /* [ (unterminated) loses. */
413 return FNM_NOMATCH;
414 else
415 {
416 bool is_range = false;
417
418 #ifdef _LIBC
419 bool is_seqval = false;
420
421 if (c == L('[') && *p == L('.'))
422 {
423 uint32_t nrules =
424 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
425 const CHAR *startp = p;
426 size_t c1 = 0;
427
428 while (1)
429 {
430 c = *++p;
431 if (c == L('.') && p[1] == L(']'))
432 {
433 p += 2;
434 break;
435 }
436 if (c == '\0')
437 return FNM_NOMATCH;
438 ++c1;
439 }
440
441 /* We have to handling the symbols differently in
442 ranges since then the collation sequence is
443 important. */
444 is_range = *p == L('-') && p[1] != L('\0');
445
446 if (nrules == 0)
447 {
448 /* There are no names defined in the collation
449 data. Therefore we only accept the trivial
450 names consisting of the character itself. */
451 if (c1 != 1)
452 return FNM_NOMATCH;
453
454 if (!is_range && *n == startp[1])
455 goto matched;
456
457 cold = startp[1];
458 c = *p++;
459 }
460 else
461 {
462 int32_t table_size;
463 const int32_t *symb_table;
464 # ifdef WIDE_CHAR_VERSION
465 char str[c1];
466 size_t strcnt;
467 # else
468 # define str (startp + 1)
469 # endif
470 const unsigned char *extra;
471 int32_t idx;
472 int32_t elem;
473 int32_t second;
474 int32_t hash;
475
476 # ifdef WIDE_CHAR_VERSION
477 /* We have to convert the name to a single-byte
478 string. This is possible since the names
479 consist of ASCII characters and the internal
480 representation is UCS4. */
481 for (strcnt = 0; strcnt < c1; ++strcnt)
482 str[strcnt] = startp[1 + strcnt];
483 # endif
484
485 table_size =
486 _NL_CURRENT_WORD (LC_COLLATE,
487 _NL_COLLATE_SYMB_HASH_SIZEMB);
488 symb_table = (const int32_t *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_TABLEMB);
491 extra = (const unsigned char *)
492 _NL_CURRENT (LC_COLLATE,
493 _NL_COLLATE_SYMB_EXTRAMB);
494
495 /* Locate the character in the hashing table. */
496 hash = elem_hash (str, c1);
497
498 idx = 0;
499 elem = hash % table_size;
500 second = hash % (table_size - 2);
501 while (symb_table[2 * elem] != 0)
502 {
503 /* First compare the hashing value. */
504 if (symb_table[2 * elem] == hash
505 && c1 == extra[symb_table[2 * elem + 1]]
506 && memcmp (str,
507 &extra[symb_table[2 * elem + 1]
508 + 1], c1) == 0)
509 {
510 /* Yep, this is the entry. */
511 idx = symb_table[2 * elem + 1];
512 idx += 1 + extra[idx];
513 break;
514 }
515
516 /* Next entry. */
517 elem += second;
518 }
519
520 if (symb_table[2 * elem] != 0)
521 {
522 /* Compare the byte sequence but only if
523 this is not part of a range. */
524 # ifdef WIDE_CHAR_VERSION
525 int32_t *wextra;
526
527 idx += 1 + extra[idx];
528 /* Adjust for the alignment. */
529 idx = (idx + 3) & ~3;
530
531 wextra = (int32_t *) &extra[idx + 4];
532 # endif
533
534 if (! is_range)
535 {
536 # ifdef WIDE_CHAR_VERSION
537 for (c1 = 0;
538 (int32_t) c1 < wextra[idx];
539 ++c1)
540 if (n[c1] != wextra[1 + c1])
541 break;
542
543 if ((int32_t) c1 == wextra[idx])
544 goto matched;
545 # else
546 for (c1 = 0; c1 < extra[idx]; ++c1)
547 if (n[c1] != extra[1 + c1])
548 break;
549
550 if (c1 == extra[idx])
551 goto matched;
552 # endif
553 }
554
555 /* Get the collation sequence value. */
556 is_seqval = true;
557 # ifdef WIDE_CHAR_VERSION
558 cold = wextra[1 + wextra[idx]];
559 # else
560 /* Adjust for the alignment. */
561 idx += 1 + extra[idx];
562 idx = (idx + 3) & ~4;
563 cold = *((int32_t *) &extra[idx]);
564 # endif
565
566 c = *p++;
567 }
568 else if (c1 == 1)
569 {
570 /* No valid character. Match it as a
571 single byte. */
572 if (!is_range && *n == str[0])
573 goto matched;
574
575 cold = str[0];
576 c = *p++;
577 }
578 else
579 return FNM_NOMATCH;
580 }
581 }
582 else
583 # undef str
584 #endif
585 {
586 c = FOLD (c);
587 normal_bracket:
588
589 /* We have to handling the symbols differently in
590 ranges since then the collation sequence is
591 important. */
592 is_range = (*p == L('-') && p[1] != L('\0')
593 && p[1] != L(']'));
594
595 if (!is_range && c == fn)
596 goto matched;
597
598 cold = c;
599 c = *p++;
600 }
601
602 if (c == L('-') && *p != L(']'))
603 {
604 #if _LIBC
605 /* We have to find the collation sequence
606 value for C. Collation sequence is nothing
607 we can regularly access. The sequence
608 value is defined by the order in which the
609 definitions of the collation values for the
610 various characters appear in the source
611 file. A strange concept, nowhere
612 documented. */
613 uint32_t fcollseq;
614 uint32_t lcollseq;
615 UCHAR cend = *p++;
616
617 # ifdef WIDE_CHAR_VERSION
618 /* Search in the `names' array for the characters. */
619 fcollseq = __collseq_table_lookup (collseq, fn);
620 if (fcollseq == ~((uint32_t) 0))
621 /* XXX We don't know anything about the character
622 we are supposed to match. This means we are
623 failing. */
624 goto range_not_matched;
625
626 if (is_seqval)
627 lcollseq = cold;
628 else
629 lcollseq = __collseq_table_lookup (collseq, cold);
630 # else
631 fcollseq = collseq[fn];
632 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
633 # endif
634
635 is_seqval = false;
636 if (cend == L('[') && *p == L('.'))
637 {
638 uint32_t nrules =
639 _NL_CURRENT_WORD (LC_COLLATE,
640 _NL_COLLATE_NRULES);
641 const CHAR *startp = p;
642 size_t c1 = 0;
643
644 while (1)
645 {
646 c = *++p;
647 if (c == L('.') && p[1] == L(']'))
648 {
649 p += 2;
650 break;
651 }
652 if (c == '\0')
653 return FNM_NOMATCH;
654 ++c1;
655 }
656
657 if (nrules == 0)
658 {
659 /* There are no names defined in the
660 collation data. Therefore we only
661 accept the trivial names consisting
662 of the character itself. */
663 if (c1 != 1)
664 return FNM_NOMATCH;
665
666 cend = startp[1];
667 }
668 else
669 {
670 int32_t table_size;
671 const int32_t *symb_table;
672 # ifdef WIDE_CHAR_VERSION
673 char str[c1];
674 size_t strcnt;
675 # else
676 # define str (startp + 1)
677 # endif
678 const unsigned char *extra;
679 int32_t idx;
680 int32_t elem;
681 int32_t second;
682 int32_t hash;
683
684 # ifdef WIDE_CHAR_VERSION
685 /* We have to convert the name to a single-byte
686 string. This is possible since the names
687 consist of ASCII characters and the internal
688 representation is UCS4. */
689 for (strcnt = 0; strcnt < c1; ++strcnt)
690 str[strcnt] = startp[1 + strcnt];
691 # endif
692
693 table_size =
694 _NL_CURRENT_WORD (LC_COLLATE,
695 _NL_COLLATE_SYMB_HASH_SIZEMB);
696 symb_table = (const int32_t *)
697 _NL_CURRENT (LC_COLLATE,
698 _NL_COLLATE_SYMB_TABLEMB);
699 extra = (const unsigned char *)
700 _NL_CURRENT (LC_COLLATE,
701 _NL_COLLATE_SYMB_EXTRAMB);
702
703 /* Locate the character in the hashing
704 table. */
705 hash = elem_hash (str, c1);
706
707 idx = 0;
708 elem = hash % table_size;
709 second = hash % (table_size - 2);
710 while (symb_table[2 * elem] != 0)
711 {
712 /* First compare the hashing value. */
713 if (symb_table[2 * elem] == hash
714 && (c1
715 == extra[symb_table[2 * elem + 1]])
716 && memcmp (str,
717 &extra[symb_table[2 * elem + 1]
718 + 1], c1) == 0)
719 {
720 /* Yep, this is the entry. */
721 idx = symb_table[2 * elem + 1];
722 idx += 1 + extra[idx];
723 break;
724 }
725
726 /* Next entry. */
727 elem += second;
728 }
729
730 if (symb_table[2 * elem] != 0)
731 {
732 /* Compare the byte sequence but only if
733 this is not part of a range. */
734 # ifdef WIDE_CHAR_VERSION
735 int32_t *wextra;
736
737 idx += 1 + extra[idx];
738 /* Adjust for the alignment. */
739 idx = (idx + 3) & ~4;
740
741 wextra = (int32_t *) &extra[idx + 4];
742 # endif
743 /* Get the collation sequence value. */
744 is_seqval = true;
745 # ifdef WIDE_CHAR_VERSION
746 cend = wextra[1 + wextra[idx]];
747 # else
748 /* Adjust for the alignment. */
749 idx += 1 + extra[idx];
750 idx = (idx + 3) & ~4;
751 cend = *((int32_t *) &extra[idx]);
752 # endif
753 }
754 else if (symb_table[2 * elem] != 0 && c1 == 1)
755 {
756 cend = str[0];
757 c = *p++;
758 }
759 else
760 return FNM_NOMATCH;
761 }
762 # undef str
763 }
764 else
765 {
766 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
767 cend = *p++;
768 if (cend == L('\0'))
769 return FNM_NOMATCH;
770 cend = FOLD (cend);
771 }
772
773 /* XXX It is not entirely clear to me how to handle
774 characters which are not mentioned in the
775 collation specification. */
776 if (
777 # ifdef WIDE_CHAR_VERSION
778 lcollseq == 0xffffffff ||
779 # endif
780 lcollseq <= fcollseq)
781 {
782 /* We have to look at the upper bound. */
783 uint32_t hcollseq;
784
785 if (is_seqval)
786 hcollseq = cend;
787 else
788 {
789 # ifdef WIDE_CHAR_VERSION
790 hcollseq =
791 __collseq_table_lookup (collseq, cend);
792 if (hcollseq == ~((uint32_t) 0))
793 {
794 /* Hum, no information about the upper
795 bound. The matching succeeds if the
796 lower bound is matched exactly. */
797 if (lcollseq != fcollseq)
798 goto range_not_matched;
799
800 goto matched;
801 }
802 # else
803 hcollseq = collseq[cend];
804 # endif
805 }
806
807 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
808 goto matched;
809 }
810 # ifdef WIDE_CHAR_VERSION
811 range_not_matched:
812 # endif
813 #else
814 /* We use a boring value comparison of the character
815 values. This is better than comparing using
816 `strcoll' since the latter would have surprising
817 and sometimes fatal consequences. */
818 UCHAR cend = *p++;
819
820 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
821 cend = *p++;
822 if (cend == L('\0'))
823 return FNM_NOMATCH;
824
825 /* It is a range. */
826 if (cold <= fn && fn <= cend)
827 goto matched;
828 #endif
829
830 c = *p++;
831 }
832 }
833
834 if (c == L(']'))
835 break;
836 }
837
838 if (!not)
839 return FNM_NOMATCH;
840 break;
841
842 matched:
843 /* Skip the rest of the [...] that already matched. */
844 do
845 {
846 ignore_next:
847 c = *p++;
848
849 if (c == L('\0'))
850 /* [... (unterminated) loses. */
851 return FNM_NOMATCH;
852
853 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
854 {
855 if (*p == L('\0'))
856 return FNM_NOMATCH;
857 /* XXX 1003.2d11 is unclear if this is right. */
858 ++p;
859 }
860 else if (c == L('[') && *p == L(':'))
861 {
862 int c1 = 0;
863 const CHAR *startp = p;
864
865 while (1)
866 {
867 c = *++p;
868 if (++c1 == CHAR_CLASS_MAX_LENGTH)
869 return FNM_NOMATCH;
870
871 if (*p == L(':') && p[1] == L(']'))
872 break;
873
874 if (c < L('a') || c >= L('z'))
875 {
876 p = startp;
877 goto ignore_next;
878 }
879 }
880 p += 2;
881 c = *p++;
882 }
883 else if (c == L('[') && *p == L('='))
884 {
885 c = *++p;
886 if (c == L('\0'))
887 return FNM_NOMATCH;
888 c = *++p;
889 if (c != L('=') || p[1] != L(']'))
890 return FNM_NOMATCH;
891 p += 2;
892 c = *p++;
893 }
894 else if (c == L('[') && *p == L('.'))
895 {
896 ++p;
897 while (1)
898 {
899 c = *++p;
900 if (c == '\0')
901 return FNM_NOMATCH;
902
903 if (*p == L('.') && p[1] == L(']'))
904 break;
905 }
906 p += 2;
907 c = *p++;
908 }
909 }
910 while (c != L(']'));
911 if (not)
912 return FNM_NOMATCH;
913 }
914 break;
915
916 case L('+'):
917 case L('@'):
918 case L('!'):
919 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
920 {
921 int res;
922
923 res = EXT (c, p, n, string_end, no_leading_period, flags);
924 if (res != -1)
925 return res;
926 }
927 goto normal_match;
928
929 case L('/'):
930 if (NO_LEADING_PERIOD (flags))
931 {
932 if (n == string_end || c != (UCHAR) *n)
933 return FNM_NOMATCH;
934
935 new_no_leading_period = true;
936 break;
937 }
938 /* FALLTHROUGH */
939 default:
940 normal_match:
941 if (n == string_end || c != FOLD ((UCHAR) *n))
942 return FNM_NOMATCH;
943 }
944
945 no_leading_period = new_no_leading_period;
946 ++n;
947 }
948
949 if (n == string_end)
950 return 0;
951
952 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
953 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
954 return 0;
955
956 return FNM_NOMATCH;
957 }
958
959
960 static const CHAR *
961 internal_function
END(const CHAR * pattern)962 END (const CHAR *pattern)
963 {
964 const CHAR *p = pattern;
965
966 while (1)
967 if (*++p == L('\0'))
968 /* This is an invalid pattern. */
969 return pattern;
970 else if (*p == L('['))
971 {
972 /* Handle brackets special. */
973 if (posixly_correct == 0)
974 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
975
976 /* Skip the not sign. We have to recognize it because of a possibly
977 following ']'. */
978 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
979 ++p;
980 /* A leading ']' is recognized as such. */
981 if (*p == L(']'))
982 ++p;
983 /* Skip over all characters of the list. */
984 while (*p != L(']'))
985 if (*p++ == L('\0'))
986 /* This is no valid pattern. */
987 return pattern;
988 }
989 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
990 || *p == L('!')) && p[1] == L('('))
991 p = END (p + 1);
992 else if (*p == L(')'))
993 break;
994
995 return p + 1;
996 }
997
998
999 static int
1000 internal_function
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)1001 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1002 bool no_leading_period, int flags)
1003 {
1004 const CHAR *startp;
1005 size_t level;
1006 struct patternlist
1007 {
1008 struct patternlist *next;
1009 CHAR str[1];
1010 } *list = NULL;
1011 struct patternlist **lastp = &list;
1012 size_t pattern_len = STRLEN (pattern);
1013 const CHAR *p;
1014 const CHAR *rs;
1015 enum { ALLOCA_LIMIT = 8000 };
1016
1017 /* Parse the pattern. Store the individual parts in the list. */
1018 level = 0;
1019 for (startp = p = pattern + 1; ; ++p)
1020 if (*p == L('\0'))
1021 /* This is an invalid pattern. */
1022 return -1;
1023 else if (*p == L('['))
1024 {
1025 /* Handle brackets special. */
1026 if (posixly_correct == 0)
1027 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1028
1029 /* Skip the not sign. We have to recognize it because of a possibly
1030 following ']'. */
1031 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1032 ++p;
1033 /* A leading ']' is recognized as such. */
1034 if (*p == L(']'))
1035 ++p;
1036 /* Skip over all characters of the list. */
1037 while (*p != L(']'))
1038 if (*p++ == L('\0'))
1039 /* This is no valid pattern. */
1040 return -1;
1041 }
1042 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1043 || *p == L('!')) && p[1] == L('('))
1044 /* Remember the nesting level. */
1045 ++level;
1046 else if (*p == L(')'))
1047 {
1048 if (level-- == 0)
1049 {
1050 /* This means we found the end of the pattern. */
1051 #define NEW_PATTERN \
1052 struct patternlist *newp; \
1053 size_t plen; \
1054 size_t plensize; \
1055 size_t newpsize; \
1056 \
1057 plen = (opt == L('?') || opt == L('@') \
1058 ? pattern_len \
1059 : p - startp + 1); \
1060 plensize = plen * sizeof (CHAR); \
1061 newpsize = offsetof (struct patternlist, str) + plensize; \
1062 if ((size_t) -1 / sizeof (CHAR) < plen \
1063 || newpsize < offsetof (struct patternlist, str) \
1064 || ALLOCA_LIMIT <= newpsize) \
1065 return -1; \
1066 newp = (struct patternlist *) alloca (newpsize); \
1067 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1068 newp->next = NULL; \
1069 *lastp = newp; \
1070 lastp = &newp->next
1071 NEW_PATTERN;
1072 break;
1073 }
1074 }
1075 else if (*p == L('|'))
1076 {
1077 if (level == 0)
1078 {
1079 NEW_PATTERN;
1080 startp = p + 1;
1081 }
1082 }
1083 assert (list != NULL);
1084 assert (p[-1] == L(')'));
1085 #undef NEW_PATTERN
1086
1087 switch (opt)
1088 {
1089 case L('*'):
1090 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1091 return 0;
1092 /* FALLTHROUGH */
1093
1094 case L('+'):
1095 do
1096 {
1097 for (rs = string; rs <= string_end; ++rs)
1098 /* First match the prefix with the current pattern with the
1099 current pattern. */
1100 if (FCT (list->str, string, rs, no_leading_period,
1101 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1102 /* This was successful. Now match the rest with the rest
1103 of the pattern. */
1104 && (FCT (p, rs, string_end,
1105 rs == string
1106 ? no_leading_period
1107 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1108 flags & FNM_FILE_NAME
1109 ? flags : flags & ~FNM_PERIOD) == 0
1110 /* This didn't work. Try the whole pattern. */
1111 || (rs != string
1112 && FCT (pattern - 1, rs, string_end,
1113 rs == string
1114 ? no_leading_period
1115 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1116 flags & FNM_FILE_NAME
1117 ? flags : flags & ~FNM_PERIOD) == 0)))
1118 /* It worked. Signal success. */
1119 return 0;
1120 }
1121 while ((list = list->next) != NULL);
1122
1123 /* None of the patterns lead to a match. */
1124 return FNM_NOMATCH;
1125
1126 case L('?'):
1127 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1128 return 0;
1129 /* FALLTHROUGH */
1130
1131 case L('@'):
1132 do
1133 /* I cannot believe it but `strcat' is actually acceptable
1134 here. Match the entire string with the prefix from the
1135 pattern list and the rest of the pattern following the
1136 pattern list. */
1137 if (FCT (STRCAT (list->str, p), string, string_end,
1138 no_leading_period,
1139 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1140 /* It worked. Signal success. */
1141 return 0;
1142 while ((list = list->next) != NULL);
1143
1144 /* None of the patterns lead to a match. */
1145 return FNM_NOMATCH;
1146
1147 case L('!'):
1148 for (rs = string; rs <= string_end; ++rs)
1149 {
1150 struct patternlist *runp;
1151
1152 for (runp = list; runp != NULL; runp = runp->next)
1153 if (FCT (runp->str, string, rs, no_leading_period,
1154 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1155 break;
1156
1157 /* If none of the patterns matched see whether the rest does. */
1158 if (runp == NULL
1159 && (FCT (p, rs, string_end,
1160 rs == string
1161 ? no_leading_period
1162 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1163 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1164 == 0))
1165 /* This is successful. */
1166 return 0;
1167 }
1168
1169 /* None of the patterns together with the rest of the pattern
1170 lead to a match. */
1171 return FNM_NOMATCH;
1172
1173 default:
1174 assert (! "Invalid extended matching operator");
1175 break;
1176 }
1177
1178 return -1;
1179 }
1180
1181
1182 #undef FOLD
1183 #undef CHAR
1184 #undef UCHAR
1185 #undef INT
1186 #undef FCT
1187 #undef EXT
1188 #undef END
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRCOLL
1192 #undef STRLEN
1193 #undef STRCAT
1194 #undef L
1195 #undef BTOWC
1196