1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005 2 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software Foundation, 16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 17 #include <sys/cdefs.h> 18 __RCSID("$NetBSD: fnmatch_loop.c,v 1.2 2016/05/17 14:00:09 christos Exp $"); 19 20 21 /* Match STRING against the file name pattern PATTERN, returning zero if 22 it matches, nonzero if not. */ 23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string, 24 const CHAR *string_end, bool no_leading_period, int flags) 25 internal_function; 26 static const CHAR *END (const CHAR *patternp) internal_function; 27 28 static int 29 internal_function 30 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, 31 bool no_leading_period, int flags) 32 { 33 register const CHAR *p = pattern, *n = string; 34 register UCHAR c; 35 #ifdef _LIBC 36 # if WIDE_CHAR_VERSION 37 const char *collseq = (const char *) 38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 39 # else 40 const UCHAR *collseq = (const UCHAR *) 41 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); 42 # endif 43 #endif 44 45 while ((c = *p++) != L('\0')) 46 { 47 bool new_no_leading_period = false; 48 c = FOLD (c); 49 50 switch (c) 51 { 52 case L('?'): 53 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') 54 { 55 int res; 56 57 res = EXT (c, p, n, string_end, no_leading_period, 58 flags); 59 if (res != -1) 60 return res; 61 } 62 63 if (n == string_end) 64 return FNM_NOMATCH; 65 else if (*n == L('/') && (flags & FNM_FILE_NAME)) 66 return FNM_NOMATCH; 67 else if (*n == L('.') && no_leading_period) 68 return FNM_NOMATCH; 69 break; 70 71 case L('\\'): 72 if (!(flags & FNM_NOESCAPE)) 73 { 74 c = *p++; 75 if (c == L('\0')) 76 /* Trailing \ loses. */ 77 return FNM_NOMATCH; 78 c = FOLD (c); 79 } 80 if (n == string_end || FOLD ((UCHAR) *n) != c) 81 return FNM_NOMATCH; 82 break; 83 84 case L('*'): 85 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') 86 { 87 int res; 88 89 res = EXT (c, p, n, string_end, no_leading_period, 90 flags); 91 if (res != -1) 92 return res; 93 } 94 95 if (n != string_end && *n == L('.') && no_leading_period) 96 return FNM_NOMATCH; 97 98 for (c = *p++; c == L('?') || c == L('*'); c = *p++) 99 { 100 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0) 101 { 102 const CHAR *endp = END (p); 103 if (endp != p) 104 { 105 /* This is a pattern. Skip over it. */ 106 p = endp; 107 continue; 108 } 109 } 110 111 if (c == L('?')) 112 { 113 /* A ? needs to match one character. */ 114 if (n == string_end) 115 /* There isn't another character; no match. */ 116 return FNM_NOMATCH; 117 else if (*n == L('/') 118 && __builtin_expect (flags & FNM_FILE_NAME, 0)) 119 /* A slash does not match a wildcard under 120 FNM_FILE_NAME. */ 121 return FNM_NOMATCH; 122 else 123 /* One character of the string is consumed in matching 124 this ? wildcard, so *??? won't match if there are 125 less than three characters. */ 126 ++n; 127 } 128 } 129 130 if (c == L('\0')) 131 /* The wildcard(s) is/are the last element of the pattern. 132 If the name is a file name and contains another slash 133 this means it cannot match, unless the FNM_LEADING_DIR 134 flag is set. */ 135 { 136 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; 137 138 if (flags & FNM_FILE_NAME) 139 { 140 if (flags & FNM_LEADING_DIR) 141 result = 0; 142 else 143 { 144 if (MEMCHR (n, L('/'), string_end - n) == NULL) 145 result = 0; 146 } 147 } 148 149 return result; 150 } 151 else 152 { 153 const CHAR *endp; 154 155 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), 156 string_end - n); 157 if (endp == NULL) 158 endp = string_end; 159 160 if (c == L('[') 161 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 162 && (c == L('@') || c == L('+') || c == L('!')) 163 && *p == L('('))) 164 { 165 int flags2 = ((flags & FNM_FILE_NAME) 166 ? flags : (flags & ~FNM_PERIOD)); 167 bool no_leading_period2 = no_leading_period; 168 169 for (--p; n < endp; ++n, no_leading_period2 = false) 170 if (FCT (p, n, string_end, no_leading_period2, flags2) 171 == 0) 172 return 0; 173 } 174 else if (c == L('/') && (flags & FNM_FILE_NAME)) 175 { 176 while (n < string_end && *n != L('/')) 177 ++n; 178 if (n < string_end && *n == L('/') 179 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) 180 == 0)) 181 return 0; 182 } 183 else 184 { 185 int flags2 = ((flags & FNM_FILE_NAME) 186 ? flags : (flags & ~FNM_PERIOD)); 187 int no_leading_period2 = no_leading_period; 188 189 if (c == L('\\') && !(flags & FNM_NOESCAPE)) 190 c = *p; 191 c = FOLD (c); 192 for (--p; n < endp; ++n, no_leading_period2 = false) 193 if (FOLD ((UCHAR) *n) == c 194 && (FCT (p, n, string_end, no_leading_period2, flags2) 195 == 0)) 196 return 0; 197 } 198 } 199 200 /* If we come here no match is possible with the wildcard. */ 201 return FNM_NOMATCH; 202 203 case L('['): 204 { 205 /* Nonzero if the sense of the character class is inverted. */ 206 register bool not; 207 CHAR cold; 208 UCHAR fn; 209 210 if (posixly_correct == 0) 211 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 212 213 if (n == string_end) 214 return FNM_NOMATCH; 215 216 if (*n == L('.') && no_leading_period) 217 return FNM_NOMATCH; 218 219 if (*n == L('/') && (flags & FNM_FILE_NAME)) 220 /* `/' cannot be matched. */ 221 return FNM_NOMATCH; 222 223 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); 224 if (not) 225 ++p; 226 227 fn = FOLD ((UCHAR) *n); 228 229 c = *p++; 230 for (;;) 231 { 232 if (!(flags & FNM_NOESCAPE) && c == L('\\')) 233 { 234 if (*p == L('\0')) 235 return FNM_NOMATCH; 236 c = FOLD ((UCHAR) *p); 237 ++p; 238 239 if (c == fn) 240 goto matched; 241 } 242 else if (c == L('[') && *p == L(':')) 243 { 244 /* Leave room for the null. */ 245 CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; 246 size_t c1 = 0; 247 #if defined _LIBC || WIDE_CHAR_SUPPORT 248 wctype_t wt; 249 #endif 250 const CHAR *startp = p; 251 252 for (;;) 253 { 254 if (c1 == CHAR_CLASS_MAX_LENGTH) 255 /* The name is too long and therefore the pattern 256 is ill-formed. */ 257 return FNM_NOMATCH; 258 259 c = *++p; 260 if (c == L(':') && p[1] == L(']')) 261 { 262 p += 2; 263 break; 264 } 265 if (c < L('a') || c >= L('z')) 266 { 267 /* This cannot possibly be a character class name. 268 Match it as a normal range. */ 269 p = startp; 270 c = L('['); 271 goto normal_bracket; 272 } 273 str[c1++] = c; 274 } 275 str[c1] = L('\0'); 276 277 #if defined _LIBC || WIDE_CHAR_SUPPORT 278 wt = IS_CHAR_CLASS (str); 279 if (wt == 0) 280 /* Invalid character class name. */ 281 return FNM_NOMATCH; 282 283 # if defined _LIBC && ! WIDE_CHAR_VERSION 284 /* The following code is glibc specific but does 285 there a good job in speeding up the code since 286 we can avoid the btowc() call. */ 287 if (_ISCTYPE ((UCHAR) *n, wt)) 288 goto matched; 289 # else 290 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) 291 goto matched; 292 # endif 293 #else 294 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n)) 295 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n)) 296 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n)) 297 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n)) 298 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n)) 299 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n)) 300 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n)) 301 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n)) 302 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n)) 303 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n)) 304 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n)) 305 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n))) 306 goto matched; 307 #endif 308 c = *p++; 309 } 310 #ifdef _LIBC 311 else if (c == L('[') && *p == L('=')) 312 { 313 UCHAR str[1]; 314 uint32_t nrules = 315 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 316 const CHAR *startp = p; 317 318 c = *++p; 319 if (c == L('\0')) 320 { 321 p = startp; 322 c = L('['); 323 goto normal_bracket; 324 } 325 str[0] = c; 326 327 c = *++p; 328 if (c != L('=') || p[1] != L(']')) 329 { 330 p = startp; 331 c = L('['); 332 goto normal_bracket; 333 } 334 p += 2; 335 336 if (nrules == 0) 337 { 338 if ((UCHAR) *n == str[0]) 339 goto matched; 340 } 341 else 342 { 343 const int32_t *table; 344 # if WIDE_CHAR_VERSION 345 const int32_t *weights; 346 const int32_t *extra; 347 # else 348 const unsigned char *weights; 349 const unsigned char *extra; 350 # endif 351 const int32_t *indirect; 352 int32_t idx; 353 const UCHAR *cp = (const UCHAR *) str; 354 355 /* This #include defines a local function! */ 356 # if WIDE_CHAR_VERSION 357 # include <locale/weightwc.h> 358 # else 359 # include <locale/weight.h> 360 # endif 361 362 # if WIDE_CHAR_VERSION 363 table = (const int32_t *) 364 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 365 weights = (const int32_t *) 366 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 367 extra = (const int32_t *) 368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 369 indirect = (const int32_t *) 370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 371 # else 372 table = (const int32_t *) 373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 374 weights = (const unsigned char *) 375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 376 extra = (const unsigned char *) 377 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 378 indirect = (const int32_t *) 379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 380 # endif 381 382 idx = findidx (&cp); 383 if (idx != 0) 384 { 385 /* We found a table entry. Now see whether the 386 character we are currently at has the same 387 equivalance class value. */ 388 int len = weights[idx]; 389 int32_t idx2; 390 const UCHAR *np = (const UCHAR *) n; 391 392 idx2 = findidx (&np); 393 if (idx2 != 0 && len == weights[idx2]) 394 { 395 int cnt = 0; 396 397 while (cnt < len 398 && (weights[idx + 1 + cnt] 399 == weights[idx2 + 1 + cnt])) 400 ++cnt; 401 402 if (cnt == len) 403 goto matched; 404 } 405 } 406 } 407 408 c = *p++; 409 } 410 #endif 411 else if (c == L('\0')) 412 /* [ (unterminated) loses. */ 413 return FNM_NOMATCH; 414 else 415 { 416 bool is_range = false; 417 418 #ifdef _LIBC 419 bool is_seqval = false; 420 421 if (c == L('[') && *p == L('.')) 422 { 423 uint32_t nrules = 424 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 425 const CHAR *startp = p; 426 size_t c1 = 0; 427 428 while (1) 429 { 430 c = *++p; 431 if (c == L('.') && p[1] == L(']')) 432 { 433 p += 2; 434 break; 435 } 436 if (c == '\0') 437 return FNM_NOMATCH; 438 ++c1; 439 } 440 441 /* We have to handling the symbols differently in 442 ranges since then the collation sequence is 443 important. */ 444 is_range = *p == L('-') && p[1] != L('\0'); 445 446 if (nrules == 0) 447 { 448 /* There are no names defined in the collation 449 data. Therefore we only accept the trivial 450 names consisting of the character itself. */ 451 if (c1 != 1) 452 return FNM_NOMATCH; 453 454 if (!is_range && *n == startp[1]) 455 goto matched; 456 457 cold = startp[1]; 458 c = *p++; 459 } 460 else 461 { 462 int32_t table_size; 463 const int32_t *symb_table; 464 # ifdef WIDE_CHAR_VERSION 465 char str[c1]; 466 size_t strcnt; 467 # else 468 # define str (startp + 1) 469 # endif 470 const unsigned char *extra; 471 int32_t idx; 472 int32_t elem; 473 int32_t second; 474 int32_t hash; 475 476 # ifdef WIDE_CHAR_VERSION 477 /* We have to convert the name to a single-byte 478 string. This is possible since the names 479 consist of ASCII characters and the internal 480 representation is UCS4. */ 481 for (strcnt = 0; strcnt < c1; ++strcnt) 482 str[strcnt] = startp[1 + strcnt]; 483 # endif 484 485 table_size = 486 _NL_CURRENT_WORD (LC_COLLATE, 487 _NL_COLLATE_SYMB_HASH_SIZEMB); 488 symb_table = (const int32_t *) 489 _NL_CURRENT (LC_COLLATE, 490 _NL_COLLATE_SYMB_TABLEMB); 491 extra = (const unsigned char *) 492 _NL_CURRENT (LC_COLLATE, 493 _NL_COLLATE_SYMB_EXTRAMB); 494 495 /* Locate the character in the hashing table. */ 496 hash = elem_hash (str, c1); 497 498 idx = 0; 499 elem = hash % table_size; 500 second = hash % (table_size - 2); 501 while (symb_table[2 * elem] != 0) 502 { 503 /* First compare the hashing value. */ 504 if (symb_table[2 * elem] == hash 505 && c1 == extra[symb_table[2 * elem + 1]] 506 && memcmp (str, 507 &extra[symb_table[2 * elem + 1] 508 + 1], c1) == 0) 509 { 510 /* Yep, this is the entry. */ 511 idx = symb_table[2 * elem + 1]; 512 idx += 1 + extra[idx]; 513 break; 514 } 515 516 /* Next entry. */ 517 elem += second; 518 } 519 520 if (symb_table[2 * elem] != 0) 521 { 522 /* Compare the byte sequence but only if 523 this is not part of a range. */ 524 # ifdef WIDE_CHAR_VERSION 525 int32_t *wextra; 526 527 idx += 1 + extra[idx]; 528 /* Adjust for the alignment. */ 529 idx = (idx + 3) & ~3; 530 531 wextra = (int32_t *) &extra[idx + 4]; 532 # endif 533 534 if (! is_range) 535 { 536 # ifdef WIDE_CHAR_VERSION 537 for (c1 = 0; 538 (int32_t) c1 < wextra[idx]; 539 ++c1) 540 if (n[c1] != wextra[1 + c1]) 541 break; 542 543 if ((int32_t) c1 == wextra[idx]) 544 goto matched; 545 # else 546 for (c1 = 0; c1 < extra[idx]; ++c1) 547 if (n[c1] != extra[1 + c1]) 548 break; 549 550 if (c1 == extra[idx]) 551 goto matched; 552 # endif 553 } 554 555 /* Get the collation sequence value. */ 556 is_seqval = true; 557 # ifdef WIDE_CHAR_VERSION 558 cold = wextra[1 + wextra[idx]]; 559 # else 560 /* Adjust for the alignment. */ 561 idx += 1 + extra[idx]; 562 idx = (idx + 3) & ~4; 563 cold = *((int32_t *) &extra[idx]); 564 # endif 565 566 c = *p++; 567 } 568 else if (c1 == 1) 569 { 570 /* No valid character. Match it as a 571 single byte. */ 572 if (!is_range && *n == str[0]) 573 goto matched; 574 575 cold = str[0]; 576 c = *p++; 577 } 578 else 579 return FNM_NOMATCH; 580 } 581 } 582 else 583 # undef str 584 #endif 585 { 586 c = FOLD (c); 587 normal_bracket: 588 589 /* We have to handling the symbols differently in 590 ranges since then the collation sequence is 591 important. */ 592 is_range = (*p == L('-') && p[1] != L('\0') 593 && p[1] != L(']')); 594 595 if (!is_range && c == fn) 596 goto matched; 597 598 cold = c; 599 c = *p++; 600 } 601 602 if (c == L('-') && *p != L(']')) 603 { 604 #if _LIBC 605 /* We have to find the collation sequence 606 value for C. Collation sequence is nothing 607 we can regularly access. The sequence 608 value is defined by the order in which the 609 definitions of the collation values for the 610 various characters appear in the source 611 file. A strange concept, nowhere 612 documented. */ 613 uint32_t fcollseq; 614 uint32_t lcollseq; 615 UCHAR cend = *p++; 616 617 # ifdef WIDE_CHAR_VERSION 618 /* Search in the `names' array for the characters. */ 619 fcollseq = __collseq_table_lookup (collseq, fn); 620 if (fcollseq == ~((uint32_t) 0)) 621 /* XXX We don't know anything about the character 622 we are supposed to match. This means we are 623 failing. */ 624 goto range_not_matched; 625 626 if (is_seqval) 627 lcollseq = cold; 628 else 629 lcollseq = __collseq_table_lookup (collseq, cold); 630 # else 631 fcollseq = collseq[fn]; 632 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; 633 # endif 634 635 is_seqval = false; 636 if (cend == L('[') && *p == L('.')) 637 { 638 uint32_t nrules = 639 _NL_CURRENT_WORD (LC_COLLATE, 640 _NL_COLLATE_NRULES); 641 const CHAR *startp = p; 642 size_t c1 = 0; 643 644 while (1) 645 { 646 c = *++p; 647 if (c == L('.') && p[1] == L(']')) 648 { 649 p += 2; 650 break; 651 } 652 if (c == '\0') 653 return FNM_NOMATCH; 654 ++c1; 655 } 656 657 if (nrules == 0) 658 { 659 /* There are no names defined in the 660 collation data. Therefore we only 661 accept the trivial names consisting 662 of the character itself. */ 663 if (c1 != 1) 664 return FNM_NOMATCH; 665 666 cend = startp[1]; 667 } 668 else 669 { 670 int32_t table_size; 671 const int32_t *symb_table; 672 # ifdef WIDE_CHAR_VERSION 673 char str[c1]; 674 size_t strcnt; 675 # else 676 # define str (startp + 1) 677 # endif 678 const unsigned char *extra; 679 int32_t idx; 680 int32_t elem; 681 int32_t second; 682 int32_t hash; 683 684 # ifdef WIDE_CHAR_VERSION 685 /* We have to convert the name to a single-byte 686 string. This is possible since the names 687 consist of ASCII characters and the internal 688 representation is UCS4. */ 689 for (strcnt = 0; strcnt < c1; ++strcnt) 690 str[strcnt] = startp[1 + strcnt]; 691 # endif 692 693 table_size = 694 _NL_CURRENT_WORD (LC_COLLATE, 695 _NL_COLLATE_SYMB_HASH_SIZEMB); 696 symb_table = (const int32_t *) 697 _NL_CURRENT (LC_COLLATE, 698 _NL_COLLATE_SYMB_TABLEMB); 699 extra = (const unsigned char *) 700 _NL_CURRENT (LC_COLLATE, 701 _NL_COLLATE_SYMB_EXTRAMB); 702 703 /* Locate the character in the hashing 704 table. */ 705 hash = elem_hash (str, c1); 706 707 idx = 0; 708 elem = hash % table_size; 709 second = hash % (table_size - 2); 710 while (symb_table[2 * elem] != 0) 711 { 712 /* First compare the hashing value. */ 713 if (symb_table[2 * elem] == hash 714 && (c1 715 == extra[symb_table[2 * elem + 1]]) 716 && memcmp (str, 717 &extra[symb_table[2 * elem + 1] 718 + 1], c1) == 0) 719 { 720 /* Yep, this is the entry. */ 721 idx = symb_table[2 * elem + 1]; 722 idx += 1 + extra[idx]; 723 break; 724 } 725 726 /* Next entry. */ 727 elem += second; 728 } 729 730 if (symb_table[2 * elem] != 0) 731 { 732 /* Compare the byte sequence but only if 733 this is not part of a range. */ 734 # ifdef WIDE_CHAR_VERSION 735 int32_t *wextra; 736 737 idx += 1 + extra[idx]; 738 /* Adjust for the alignment. */ 739 idx = (idx + 3) & ~4; 740 741 wextra = (int32_t *) &extra[idx + 4]; 742 # endif 743 /* Get the collation sequence value. */ 744 is_seqval = true; 745 # ifdef WIDE_CHAR_VERSION 746 cend = wextra[1 + wextra[idx]]; 747 # else 748 /* Adjust for the alignment. */ 749 idx += 1 + extra[idx]; 750 idx = (idx + 3) & ~4; 751 cend = *((int32_t *) &extra[idx]); 752 # endif 753 } 754 else if (symb_table[2 * elem] != 0 && c1 == 1) 755 { 756 cend = str[0]; 757 c = *p++; 758 } 759 else 760 return FNM_NOMATCH; 761 } 762 # undef str 763 } 764 else 765 { 766 if (!(flags & FNM_NOESCAPE) && cend == L('\\')) 767 cend = *p++; 768 if (cend == L('\0')) 769 return FNM_NOMATCH; 770 cend = FOLD (cend); 771 } 772 773 /* XXX It is not entirely clear to me how to handle 774 characters which are not mentioned in the 775 collation specification. */ 776 if ( 777 # ifdef WIDE_CHAR_VERSION 778 lcollseq == 0xffffffff || 779 # endif 780 lcollseq <= fcollseq) 781 { 782 /* We have to look at the upper bound. */ 783 uint32_t hcollseq; 784 785 if (is_seqval) 786 hcollseq = cend; 787 else 788 { 789 # ifdef WIDE_CHAR_VERSION 790 hcollseq = 791 __collseq_table_lookup (collseq, cend); 792 if (hcollseq == ~((uint32_t) 0)) 793 { 794 /* Hum, no information about the upper 795 bound. The matching succeeds if the 796 lower bound is matched exactly. */ 797 if (lcollseq != fcollseq) 798 goto range_not_matched; 799 800 goto matched; 801 } 802 # else 803 hcollseq = collseq[cend]; 804 # endif 805 } 806 807 if (lcollseq <= hcollseq && fcollseq <= hcollseq) 808 goto matched; 809 } 810 # ifdef WIDE_CHAR_VERSION 811 range_not_matched: 812 # endif 813 #else 814 /* We use a boring value comparison of the character 815 values. This is better than comparing using 816 `strcoll' since the latter would have surprising 817 and sometimes fatal consequences. */ 818 UCHAR cend = *p++; 819 820 if (!(flags & FNM_NOESCAPE) && cend == L('\\')) 821 cend = *p++; 822 if (cend == L('\0')) 823 return FNM_NOMATCH; 824 825 /* It is a range. */ 826 if (cold <= fn && fn <= cend) 827 goto matched; 828 #endif 829 830 c = *p++; 831 } 832 } 833 834 if (c == L(']')) 835 break; 836 } 837 838 if (!not) 839 return FNM_NOMATCH; 840 break; 841 842 matched: 843 /* Skip the rest of the [...] that already matched. */ 844 do 845 { 846 ignore_next: 847 c = *p++; 848 849 if (c == L('\0')) 850 /* [... (unterminated) loses. */ 851 return FNM_NOMATCH; 852 853 if (!(flags & FNM_NOESCAPE) && c == L('\\')) 854 { 855 if (*p == L('\0')) 856 return FNM_NOMATCH; 857 /* XXX 1003.2d11 is unclear if this is right. */ 858 ++p; 859 } 860 else if (c == L('[') && *p == L(':')) 861 { 862 int c1 = 0; 863 const CHAR *startp = p; 864 865 while (1) 866 { 867 c = *++p; 868 if (++c1 == CHAR_CLASS_MAX_LENGTH) 869 return FNM_NOMATCH; 870 871 if (*p == L(':') && p[1] == L(']')) 872 break; 873 874 if (c < L('a') || c >= L('z')) 875 { 876 p = startp; 877 goto ignore_next; 878 } 879 } 880 p += 2; 881 c = *p++; 882 } 883 else if (c == L('[') && *p == L('=')) 884 { 885 c = *++p; 886 if (c == L('\0')) 887 return FNM_NOMATCH; 888 c = *++p; 889 if (c != L('=') || p[1] != L(']')) 890 return FNM_NOMATCH; 891 p += 2; 892 c = *p++; 893 } 894 else if (c == L('[') && *p == L('.')) 895 { 896 ++p; 897 while (1) 898 { 899 c = *++p; 900 if (c == '\0') 901 return FNM_NOMATCH; 902 903 if (*p == L('.') && p[1] == L(']')) 904 break; 905 } 906 p += 2; 907 c = *p++; 908 } 909 } 910 while (c != L(']')); 911 if (not) 912 return FNM_NOMATCH; 913 } 914 break; 915 916 case L('+'): 917 case L('@'): 918 case L('!'): 919 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') 920 { 921 int res; 922 923 res = EXT (c, p, n, string_end, no_leading_period, flags); 924 if (res != -1) 925 return res; 926 } 927 goto normal_match; 928 929 case L('/'): 930 if (NO_LEADING_PERIOD (flags)) 931 { 932 if (n == string_end || c != (UCHAR) *n) 933 return FNM_NOMATCH; 934 935 new_no_leading_period = true; 936 break; 937 } 938 /* FALLTHROUGH */ 939 default: 940 normal_match: 941 if (n == string_end || c != FOLD ((UCHAR) *n)) 942 return FNM_NOMATCH; 943 } 944 945 no_leading_period = new_no_leading_period; 946 ++n; 947 } 948 949 if (n == string_end) 950 return 0; 951 952 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/')) 953 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ 954 return 0; 955 956 return FNM_NOMATCH; 957 } 958 959 960 static const CHAR * 961 internal_function 962 END (const CHAR *pattern) 963 { 964 const CHAR *p = pattern; 965 966 while (1) 967 if (*++p == L('\0')) 968 /* This is an invalid pattern. */ 969 return pattern; 970 else if (*p == L('[')) 971 { 972 /* Handle brackets special. */ 973 if (posixly_correct == 0) 974 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 975 976 /* Skip the not sign. We have to recognize it because of a possibly 977 following ']'. */ 978 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) 979 ++p; 980 /* A leading ']' is recognized as such. */ 981 if (*p == L(']')) 982 ++p; 983 /* Skip over all characters of the list. */ 984 while (*p != L(']')) 985 if (*p++ == L('\0')) 986 /* This is no valid pattern. */ 987 return pattern; 988 } 989 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') 990 || *p == L('!')) && p[1] == L('(')) 991 p = END (p + 1); 992 else if (*p == L(')')) 993 break; 994 995 return p + 1; 996 } 997 998 999 static int 1000 internal_function 1001 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, 1002 bool no_leading_period, int flags) 1003 { 1004 const CHAR *startp; 1005 size_t level; 1006 struct patternlist 1007 { 1008 struct patternlist *next; 1009 CHAR str[1]; 1010 } *list = NULL; 1011 struct patternlist **lastp = &list; 1012 size_t pattern_len = STRLEN (pattern); 1013 const CHAR *p; 1014 const CHAR *rs; 1015 enum { ALLOCA_LIMIT = 8000 }; 1016 1017 /* Parse the pattern. Store the individual parts in the list. */ 1018 level = 0; 1019 for (startp = p = pattern + 1; ; ++p) 1020 if (*p == L('\0')) 1021 /* This is an invalid pattern. */ 1022 return -1; 1023 else if (*p == L('[')) 1024 { 1025 /* Handle brackets special. */ 1026 if (posixly_correct == 0) 1027 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 1028 1029 /* Skip the not sign. We have to recognize it because of a possibly 1030 following ']'. */ 1031 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) 1032 ++p; 1033 /* A leading ']' is recognized as such. */ 1034 if (*p == L(']')) 1035 ++p; 1036 /* Skip over all characters of the list. */ 1037 while (*p != L(']')) 1038 if (*p++ == L('\0')) 1039 /* This is no valid pattern. */ 1040 return -1; 1041 } 1042 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') 1043 || *p == L('!')) && p[1] == L('(')) 1044 /* Remember the nesting level. */ 1045 ++level; 1046 else if (*p == L(')')) 1047 { 1048 if (level-- == 0) 1049 { 1050 /* This means we found the end of the pattern. */ 1051 #define NEW_PATTERN \ 1052 struct patternlist *newp; \ 1053 size_t plen; \ 1054 size_t plensize; \ 1055 size_t newpsize; \ 1056 \ 1057 plen = (opt == L('?') || opt == L('@') \ 1058 ? pattern_len \ 1059 : p - startp + 1); \ 1060 plensize = plen * sizeof (CHAR); \ 1061 newpsize = offsetof (struct patternlist, str) + plensize; \ 1062 if ((size_t) -1 / sizeof (CHAR) < plen \ 1063 || newpsize < offsetof (struct patternlist, str) \ 1064 || ALLOCA_LIMIT <= newpsize) \ 1065 return -1; \ 1066 newp = (struct patternlist *) alloca (newpsize); \ 1067 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \ 1068 newp->next = NULL; \ 1069 *lastp = newp; \ 1070 lastp = &newp->next 1071 NEW_PATTERN; 1072 break; 1073 } 1074 } 1075 else if (*p == L('|')) 1076 { 1077 if (level == 0) 1078 { 1079 NEW_PATTERN; 1080 startp = p + 1; 1081 } 1082 } 1083 assert (list != NULL); 1084 assert (p[-1] == L(')')); 1085 #undef NEW_PATTERN 1086 1087 switch (opt) 1088 { 1089 case L('*'): 1090 if (FCT (p, string, string_end, no_leading_period, flags) == 0) 1091 return 0; 1092 /* FALLTHROUGH */ 1093 1094 case L('+'): 1095 do 1096 { 1097 for (rs = string; rs <= string_end; ++rs) 1098 /* First match the prefix with the current pattern with the 1099 current pattern. */ 1100 if (FCT (list->str, string, rs, no_leading_period, 1101 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 1102 /* This was successful. Now match the rest with the rest 1103 of the pattern. */ 1104 && (FCT (p, rs, string_end, 1105 rs == string 1106 ? no_leading_period 1107 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1108 flags & FNM_FILE_NAME 1109 ? flags : flags & ~FNM_PERIOD) == 0 1110 /* This didn't work. Try the whole pattern. */ 1111 || (rs != string 1112 && FCT (pattern - 1, rs, string_end, 1113 rs == string 1114 ? no_leading_period 1115 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1116 flags & FNM_FILE_NAME 1117 ? flags : flags & ~FNM_PERIOD) == 0))) 1118 /* It worked. Signal success. */ 1119 return 0; 1120 } 1121 while ((list = list->next) != NULL); 1122 1123 /* None of the patterns lead to a match. */ 1124 return FNM_NOMATCH; 1125 1126 case L('?'): 1127 if (FCT (p, string, string_end, no_leading_period, flags) == 0) 1128 return 0; 1129 /* FALLTHROUGH */ 1130 1131 case L('@'): 1132 do 1133 /* I cannot believe it but `strcat' is actually acceptable 1134 here. Match the entire string with the prefix from the 1135 pattern list and the rest of the pattern following the 1136 pattern list. */ 1137 if (FCT (STRCAT (list->str, p), string, string_end, 1138 no_leading_period, 1139 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) 1140 /* It worked. Signal success. */ 1141 return 0; 1142 while ((list = list->next) != NULL); 1143 1144 /* None of the patterns lead to a match. */ 1145 return FNM_NOMATCH; 1146 1147 case L('!'): 1148 for (rs = string; rs <= string_end; ++rs) 1149 { 1150 struct patternlist *runp; 1151 1152 for (runp = list; runp != NULL; runp = runp->next) 1153 if (FCT (runp->str, string, rs, no_leading_period, 1154 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) 1155 break; 1156 1157 /* If none of the patterns matched see whether the rest does. */ 1158 if (runp == NULL 1159 && (FCT (p, rs, string_end, 1160 rs == string 1161 ? no_leading_period 1162 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1163 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) 1164 == 0)) 1165 /* This is successful. */ 1166 return 0; 1167 } 1168 1169 /* None of the patterns together with the rest of the pattern 1170 lead to a match. */ 1171 return FNM_NOMATCH; 1172 1173 default: 1174 assert (! "Invalid extended matching operator"); 1175 break; 1176 } 1177 1178 return -1; 1179 } 1180 1181 1182 #undef FOLD 1183 #undef CHAR 1184 #undef UCHAR 1185 #undef INT 1186 #undef FCT 1187 #undef EXT 1188 #undef END 1189 #undef MEMPCPY 1190 #undef MEMCHR 1191 #undef STRCOLL 1192 #undef STRLEN 1193 #undef STRCAT 1194 #undef L 1195 #undef BTOWC 1196