1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #if defined(LIBC_SCCS) && !defined(lint) 34 static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93"; 35 /* most changes between the version above and the one below have been ported: 36 static char sscsid[]= "$OpenBSD: glob.c,v 1.8.10.1 2001/04/10 jason Exp $"; 37 */ 38 #endif /* LIBC_SCCS and not lint */ 39 40 /* 41 * glob(3) -- a superset of the one defined in POSIX 1003.2. 42 * 43 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 44 * 45 * Optional extra services, controlled by flags not defined by POSIX: 46 * 47 * GLOB_QUOTE: 48 * Escaping convention: \ inhibits any special meaning the following 49 * character might have (except \ at end of string is retained). 50 * GLOB_MAGCHAR: 51 * Set in gl_flags if pattern contained a globbing character. 52 * GLOB_NOMAGIC: 53 * Same as GLOB_NOCHECK, but it will only append pattern if it did 54 * not contain any magic characters. [Used in csh style globbing] 55 * GLOB_ALTDIRFUNC: 56 * Use alternately specified directory access functions. 57 * GLOB_TILDE: 58 * expand ~user/foo to the /home/dir/of/user/foo 59 * GLOB_BRACE: 60 * expand {1,2}{a,b} to 1a 1b 2a 2b 61 * gl_matchc: 62 * Number of matches in the current invocation of glob. 63 * GLOB_ALPHASORT: 64 * sort alphabetically like csh (case doesn't matter) instead of in ASCII 65 * order 66 */ 67 68 #include <EXTERN.h> 69 #include <perl.h> 70 #include <XSUB.h> 71 72 #include "bsd_glob.h" 73 #ifdef I_PWD 74 # include <pwd.h> 75 #else 76 #if defined(HAS_PASSWD) && !defined(VMS) 77 struct passwd *getpwnam(char *); 78 struct passwd *getpwuid(Uid_t); 79 #endif 80 #endif 81 82 #ifndef MAXPATHLEN 83 # ifdef PATH_MAX 84 # define MAXPATHLEN PATH_MAX 85 # else 86 # define MAXPATHLEN 1024 87 # endif 88 #endif 89 90 #include <limits.h> 91 92 #ifndef ARG_MAX 93 # ifdef _SC_ARG_MAX 94 # define ARG_MAX (sysconf(_SC_ARG_MAX)) 95 # else 96 # ifdef _POSIX_ARG_MAX 97 # define ARG_MAX _POSIX_ARG_MAX 98 # else 99 # ifdef WIN32 100 # define ARG_MAX 14500 /* from VC's limits.h */ 101 # else 102 # define ARG_MAX 4096 /* from POSIX, be conservative */ 103 # endif 104 # endif 105 # endif 106 #endif 107 108 #define BG_DOLLAR '$' 109 #define BG_DOT '.' 110 #define BG_EOS '\0' 111 #define BG_LBRACKET '[' 112 #define BG_NOT '!' 113 #define BG_QUESTION '?' 114 #define BG_QUOTE '\\' 115 #define BG_RANGE '-' 116 #define BG_RBRACKET ']' 117 #define BG_SEP '/' 118 #ifdef DOSISH 119 #define BG_SEP2 '\\' 120 #endif 121 #define BG_STAR '*' 122 #define BG_TILDE '~' 123 #define BG_UNDERSCORE '_' 124 #define BG_LBRACE '{' 125 #define BG_RBRACE '}' 126 #define BG_SLASH '/' 127 #define BG_COMMA ',' 128 129 #ifndef GLOB_DEBUG 130 131 #define M_QUOTE 0x8000 132 #define M_PROTECT 0x4000 133 #define M_MASK 0xffff 134 #define M_ASCII 0x00ff 135 136 typedef U16 Char; 137 138 #else 139 140 #define M_QUOTE 0x80 141 #define M_PROTECT 0x40 142 #define M_MASK 0xff 143 #define M_ASCII 0x7f 144 145 typedef U8 Char; 146 147 #endif /* !GLOB_DEBUG */ 148 149 150 #define CHAR(c) ((Char)((c)&M_ASCII)) 151 #define META(c) ((Char)((c)|M_QUOTE)) 152 #define M_ALL META('*') 153 #define M_END META(']') 154 #define M_NOT META('!') 155 #define M_ONE META('?') 156 #define M_RNG META('-') 157 #define M_SET META('[') 158 #define ismeta(c) (((c)&M_QUOTE) != 0) 159 160 161 static int compare(const void *, const void *); 162 static int ci_compare(const void *, const void *); 163 static int g_Ctoc(const Char *, char *, STRLEN); 164 static int g_lstat(Char *, Stat_t *, glob_t *); 165 static DIR *g_opendir(Char *, glob_t *); 166 static Char *g_strchr(Char *, int); 167 static int g_stat(Char *, Stat_t *, glob_t *); 168 static int glob0(const Char *, glob_t *); 169 static int glob1(Char *, Char *, glob_t *, size_t *); 170 static int glob2(Char *, Char *, Char *, Char *, Char *, Char *, 171 glob_t *, size_t *); 172 static int glob3(Char *, Char *, Char *, Char *, Char *, 173 Char *, Char *, glob_t *, size_t *); 174 static int globextend(const Char *, glob_t *, size_t *); 175 static const Char * 176 globtilde(const Char *, Char *, size_t, glob_t *); 177 static int globexp1(const Char *, glob_t *); 178 static int globexp2(const Char *, const Char *, glob_t *, int *); 179 static int match(Char *, Char *, Char *, int); 180 #ifdef GLOB_DEBUG 181 static void qprintf(const char *, Char *); 182 #endif /* GLOB_DEBUG */ 183 184 #ifdef PERL_IMPLICIT_CONTEXT 185 static Direntry_t * my_readdir(DIR*); 186 187 static Direntry_t * 188 my_readdir(DIR *d) 189 { 190 #ifndef NETWARE 191 return PerlDir_read(d); 192 #else 193 return (DIR *)PerlDir_read(d); 194 #endif 195 } 196 #else 197 198 /* ReliantUNIX (OS formerly known as SINIX) defines readdir 199 * in LFS-mode to be a 64-bit version of readdir. */ 200 201 # ifdef sinix 202 static Direntry_t * my_readdir(DIR*); 203 204 static Direntry_t * 205 my_readdir(DIR *d) 206 { 207 return readdir(d); 208 } 209 # else 210 211 # define my_readdir readdir 212 213 # endif 214 215 #endif 216 217 int 218 bsd_glob(const char *pattern, int flags, 219 int (*errfunc)(const char *, int), glob_t *pglob) 220 { 221 const U8 *patnext; 222 int c; 223 Char *bufnext, *bufend, patbuf[MAXPATHLEN]; 224 patnext = (U8 *) pattern; 225 /* TODO: GLOB_APPEND / GLOB_DOOFFS aren't supported yet */ 226 #if 0 227 if (!(flags & GLOB_APPEND)) { 228 pglob->gl_pathc = 0; 229 pglob->gl_pathv = NULL; 230 if (!(flags & GLOB_DOOFFS)) 231 pglob->gl_offs = 0; 232 } 233 #else 234 pglob->gl_pathc = 0; 235 pglob->gl_pathv = NULL; 236 pglob->gl_offs = 0; 237 #endif 238 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 239 pglob->gl_errfunc = errfunc; 240 pglob->gl_matchc = 0; 241 242 bufnext = patbuf; 243 bufend = bufnext + MAXPATHLEN - 1; 244 #ifdef DOSISH 245 /* Nasty hack to treat patterns like "C:*" correctly. In this 246 * case, the * should match any file in the current directory 247 * on the C: drive. However, the glob code does not treat the 248 * colon specially, so it looks for files beginning "C:" in 249 * the current directory. To fix this, change the pattern to 250 * add an explicit "./" at the start (just after the drive 251 * letter and colon - ie change to "C:./"). 252 */ 253 if (isalpha(pattern[0]) && pattern[1] == ':' && 254 pattern[2] != BG_SEP && pattern[2] != BG_SEP2 && 255 bufend - bufnext > 4) { 256 *bufnext++ = pattern[0]; 257 *bufnext++ = ':'; 258 *bufnext++ = '.'; 259 *bufnext++ = BG_SEP; 260 patnext += 2; 261 } 262 #endif 263 264 if (flags & GLOB_QUOTE) { 265 /* Protect the quoted characters. */ 266 while (bufnext < bufend && (c = *patnext++) != BG_EOS) 267 if (c == BG_QUOTE) { 268 #ifdef DOSISH 269 /* To avoid backslashitis on Win32, 270 * we only treat \ as a quoting character 271 * if it precedes one of the 272 * metacharacters []-{}~\ 273 */ 274 if ((c = *patnext++) != '[' && c != ']' && 275 c != '-' && c != '{' && c != '}' && 276 c != '~' && c != '\\') { 277 #else 278 if ((c = *patnext++) == BG_EOS) { 279 #endif 280 c = BG_QUOTE; 281 --patnext; 282 } 283 *bufnext++ = c | M_PROTECT; 284 } else 285 *bufnext++ = c; 286 } else 287 while (bufnext < bufend && (c = *patnext++) != BG_EOS) 288 *bufnext++ = c; 289 *bufnext = BG_EOS; 290 291 if (flags & GLOB_BRACE) 292 return globexp1(patbuf, pglob); 293 else 294 return glob0(patbuf, pglob); 295 } 296 297 /* 298 * Expand recursively a glob {} pattern. When there is no more expansion 299 * invoke the standard globbing routine to glob the rest of the magic 300 * characters 301 */ 302 static int 303 globexp1(const Char *pattern, glob_t *pglob) 304 { 305 const Char* ptr = pattern; 306 int rv; 307 308 /* Protect a single {}, for find(1), like csh */ 309 if (pattern[0] == BG_LBRACE && pattern[1] == BG_RBRACE && pattern[2] == BG_EOS) 310 return glob0(pattern, pglob); 311 312 while ((ptr = (const Char *) g_strchr((Char *) ptr, BG_LBRACE)) != NULL) 313 if (!globexp2(ptr, pattern, pglob, &rv)) 314 return rv; 315 316 return glob0(pattern, pglob); 317 } 318 319 320 /* 321 * Recursive brace globbing helper. Tries to expand a single brace. 322 * If it succeeds then it invokes globexp1 with the new pattern. 323 * If it fails then it tries to glob the rest of the pattern and returns. 324 */ 325 static int 326 globexp2(const Char *ptr, const Char *pattern, 327 glob_t *pglob, int *rv) 328 { 329 int i; 330 Char *lm, *ls; 331 const Char *pe, *pm, *pm1, *pl; 332 Char patbuf[MAXPATHLEN]; 333 334 /* copy part up to the brace */ 335 for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) 336 ; 337 *lm = BG_EOS; 338 ls = lm; 339 340 /* Find the balanced brace */ 341 for (i = 0, pe = ++ptr; *pe; pe++) 342 if (*pe == BG_LBRACKET) { 343 /* Ignore everything between [] */ 344 for (pm = pe++; *pe != BG_RBRACKET && *pe != BG_EOS; pe++) 345 ; 346 if (*pe == BG_EOS) { 347 /* 348 * We could not find a matching BG_RBRACKET. 349 * Ignore and just look for BG_RBRACE 350 */ 351 pe = pm; 352 } 353 } else if (*pe == BG_LBRACE) 354 i++; 355 else if (*pe == BG_RBRACE) { 356 if (i == 0) 357 break; 358 i--; 359 } 360 361 /* Non matching braces; just glob the pattern */ 362 if (i != 0 || *pe == BG_EOS) { 363 *rv = glob0(patbuf, pglob); 364 return 0; 365 } 366 367 for (i = 0, pl = pm = ptr; pm <= pe; pm++) { 368 switch (*pm) { 369 case BG_LBRACKET: 370 /* Ignore everything between [] */ 371 for (pm1 = pm++; *pm != BG_RBRACKET && *pm != BG_EOS; pm++) 372 ; 373 if (*pm == BG_EOS) { 374 /* 375 * We could not find a matching BG_RBRACKET. 376 * Ignore and just look for BG_RBRACE 377 */ 378 pm = pm1; 379 } 380 break; 381 382 case BG_LBRACE: 383 i++; 384 break; 385 386 case BG_RBRACE: 387 if (i) { 388 i--; 389 break; 390 } 391 /* FALLTHROUGH */ 392 case BG_COMMA: 393 if (i && *pm == BG_COMMA) 394 break; 395 else { 396 /* Append the current string */ 397 for (lm = ls; (pl < pm); *lm++ = *pl++) 398 ; 399 400 /* 401 * Append the rest of the pattern after the 402 * closing brace 403 */ 404 for (pl = pe + 1; (*lm++ = *pl++) != BG_EOS; ) 405 ; 406 407 /* Expand the current pattern */ 408 #ifdef GLOB_DEBUG 409 qprintf("globexp2:", patbuf); 410 #endif /* GLOB_DEBUG */ 411 *rv = globexp1(patbuf, pglob); 412 413 /* move after the comma, to the next string */ 414 pl = pm + 1; 415 } 416 break; 417 418 default: 419 break; 420 } 421 } 422 *rv = 0; 423 return 0; 424 } 425 426 427 428 /* 429 * expand tilde from the passwd file. 430 */ 431 static const Char * 432 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) 433 { 434 char *h; 435 const Char *p; 436 Char *b, *eb; 437 438 if (*pattern != BG_TILDE || !(pglob->gl_flags & GLOB_TILDE)) 439 return pattern; 440 441 /* Copy up to the end of the string or / */ 442 eb = &patbuf[patbuf_len - 1]; 443 for (p = pattern + 1, h = (char *) patbuf; 444 h < (char*)eb && *p && *p != BG_SLASH; *h++ = (char)*p++) 445 ; 446 447 *h = BG_EOS; 448 449 #if 0 450 if (h == (char *)eb) 451 return what; 452 #endif 453 454 if (((char *) patbuf)[0] == BG_EOS) { 455 /* 456 * handle a plain ~ or ~/ by expanding $HOME 457 * first and then trying the password file 458 * or $USERPROFILE on DOSISH systems 459 */ 460 if ((h = getenv("HOME")) == NULL) { 461 #ifdef HAS_PASSWD 462 struct passwd *pwd; 463 if ((pwd = getpwuid(getuid())) == NULL) 464 return pattern; 465 else 466 h = pwd->pw_dir; 467 #elif DOSISH 468 /* 469 * When no passwd file, fallback to the USERPROFILE 470 * environment variable on DOSish systems. 471 */ 472 if ((h = getenv("USERPROFILE")) == NULL) { 473 return pattern; 474 } 475 #else 476 return pattern; 477 #endif 478 } 479 } else { 480 /* 481 * Expand a ~user 482 */ 483 #ifdef HAS_PASSWD 484 struct passwd *pwd; 485 if ((pwd = getpwnam((char*) patbuf)) == NULL) 486 return pattern; 487 else 488 h = pwd->pw_dir; 489 #else 490 return pattern; 491 #endif 492 } 493 494 /* Copy the home directory */ 495 for (b = patbuf; b < eb && *h; *b++ = *h++) 496 ; 497 498 /* Append the rest of the pattern */ 499 while (b < eb && (*b++ = *p++) != BG_EOS) 500 ; 501 *b = BG_EOS; 502 503 return patbuf; 504 } 505 506 507 /* 508 * The main glob() routine: compiles the pattern (optionally processing 509 * quotes), calls glob1() to do the real pattern matching, and finally 510 * sorts the list (unless unsorted operation is requested). Returns 0 511 * if things went well, nonzero if errors occurred. It is not an error 512 * to find no matches. 513 */ 514 static int 515 glob0(const Char *pattern, glob_t *pglob) 516 { 517 const Char *qpat, *qpatnext; 518 int c, err, oldflags, oldpathc; 519 Char *bufnext, patbuf[MAXPATHLEN]; 520 size_t limit = 0; 521 522 qpat = globtilde(pattern, patbuf, MAXPATHLEN, pglob); 523 qpatnext = qpat; 524 oldflags = pglob->gl_flags; 525 oldpathc = pglob->gl_pathc; 526 bufnext = patbuf; 527 528 /* We don't need to check for buffer overflow any more. */ 529 while ((c = *qpatnext++) != BG_EOS) { 530 switch (c) { 531 case BG_LBRACKET: 532 c = *qpatnext; 533 if (c == BG_NOT) 534 ++qpatnext; 535 if (*qpatnext == BG_EOS || 536 g_strchr((Char *) qpatnext+1, BG_RBRACKET) == NULL) { 537 *bufnext++ = BG_LBRACKET; 538 if (c == BG_NOT) 539 --qpatnext; 540 break; 541 } 542 *bufnext++ = M_SET; 543 if (c == BG_NOT) 544 *bufnext++ = M_NOT; 545 c = *qpatnext++; 546 do { 547 *bufnext++ = CHAR(c); 548 if (*qpatnext == BG_RANGE && 549 (c = qpatnext[1]) != BG_RBRACKET) { 550 *bufnext++ = M_RNG; 551 *bufnext++ = CHAR(c); 552 qpatnext += 2; 553 } 554 } while ((c = *qpatnext++) != BG_RBRACKET); 555 pglob->gl_flags |= GLOB_MAGCHAR; 556 *bufnext++ = M_END; 557 break; 558 case BG_QUESTION: 559 pglob->gl_flags |= GLOB_MAGCHAR; 560 *bufnext++ = M_ONE; 561 break; 562 case BG_STAR: 563 pglob->gl_flags |= GLOB_MAGCHAR; 564 /* Collapse adjacent stars to one. 565 * This is required to ensure that a pattern like 566 * "a**" matches a name like "a", as without this 567 * check when the first star matched everything it would 568 * cause the second star to return a match fail. 569 * As long ** is folded here this does not happen. 570 */ 571 if (bufnext == patbuf || bufnext[-1] != M_ALL) 572 *bufnext++ = M_ALL; 573 break; 574 default: 575 *bufnext++ = CHAR(c); 576 break; 577 } 578 } 579 *bufnext = BG_EOS; 580 #ifdef GLOB_DEBUG 581 qprintf("glob0:", patbuf); 582 #endif /* GLOB_DEBUG */ 583 584 if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, &limit)) != 0) { 585 pglob->gl_flags = oldflags; 586 return(err); 587 } 588 589 /* 590 * If there was no match we are going to append the pattern 591 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 592 * and the pattern did not contain any magic characters 593 * GLOB_NOMAGIC is there just for compatibility with csh. 594 */ 595 if (pglob->gl_pathc == oldpathc && 596 ((pglob->gl_flags & GLOB_NOCHECK) || 597 ((pglob->gl_flags & GLOB_NOMAGIC) && 598 !(pglob->gl_flags & GLOB_MAGCHAR)))) 599 { 600 #ifdef GLOB_DEBUG 601 printf("calling globextend from glob0\n"); 602 #endif /* GLOB_DEBUG */ 603 pglob->gl_flags = oldflags; 604 return(globextend(qpat, pglob, &limit)); 605 } 606 else if (!(pglob->gl_flags & GLOB_NOSORT)) 607 if (pglob->gl_pathv) 608 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 609 pglob->gl_pathc - oldpathc, sizeof(char *), 610 (pglob->gl_flags & (GLOB_ALPHASORT|GLOB_NOCASE)) 611 ? ci_compare : compare); 612 pglob->gl_flags = oldflags; 613 return(0); 614 } 615 616 static int 617 ci_compare(const void *p, const void *q) 618 { 619 const char *pp = *(const char **)p; 620 const char *qq = *(const char **)q; 621 int ci; 622 while (*pp && *qq) { 623 if (toFOLD(*pp) != toFOLD(*qq)) 624 break; 625 ++pp; 626 ++qq; 627 } 628 ci = toFOLD(*pp) - toFOLD(*qq); 629 if (ci == 0) 630 return compare(p, q); 631 return ci; 632 } 633 634 static int 635 compare(const void *p, const void *q) 636 { 637 return(strcmp(*(char **)p, *(char **)q)); 638 } 639 640 static int 641 glob1(Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp) 642 { 643 Char pathbuf[MAXPATHLEN]; 644 645 assert(pattern < pattern_last); 646 647 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 648 if (*pattern == BG_EOS) 649 return(0); 650 return(glob2(pathbuf, pathbuf+MAXPATHLEN-1, 651 pathbuf, pathbuf+MAXPATHLEN-1, 652 pattern, pattern_last, pglob, limitp)); 653 } 654 655 /* 656 * The functions glob2 and glob3 are mutually recursive; there is one level 657 * of recursion for each segment in the pattern that contains one or more 658 * meta characters. 659 */ 660 static int 661 glob2(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last, 662 Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp) 663 { 664 Stat_t sb; 665 Char *p, *q; 666 int anymeta; 667 668 assert(pattern < pattern_last); 669 670 /* 671 * Loop over pattern segments until end of pattern or until 672 * segment with meta character found. 673 */ 674 for (anymeta = 0;;) { 675 if (*pattern == BG_EOS) { /* End of pattern? */ 676 *pathend = BG_EOS; 677 if (g_lstat(pathbuf, &sb, pglob)) 678 return(0); 679 680 if (((pglob->gl_flags & GLOB_MARK) && 681 pathend[-1] != BG_SEP 682 #ifdef DOSISH 683 && pathend[-1] != BG_SEP2 684 #endif 685 ) && (S_ISDIR(sb.st_mode) || 686 (S_ISLNK(sb.st_mode) && 687 (g_stat(pathbuf, &sb, pglob) == 0) && 688 S_ISDIR(sb.st_mode)))) { 689 if (pathend+1 > pathend_last) 690 return (1); 691 *pathend++ = BG_SEP; 692 *pathend = BG_EOS; 693 } 694 ++pglob->gl_matchc; 695 #ifdef GLOB_DEBUG 696 printf("calling globextend from glob2\n"); 697 #endif /* GLOB_DEBUG */ 698 return(globextend(pathbuf, pglob, limitp)); 699 } 700 701 /* Find end of next segment, copy tentatively to pathend. */ 702 q = pathend; 703 p = pattern; 704 while (*p != BG_EOS && *p != BG_SEP 705 #ifdef DOSISH 706 && *p != BG_SEP2 707 #endif 708 ) { 709 assert(p < pattern_last); 710 if (ismeta(*p)) 711 anymeta = 1; 712 if (q+1 > pathend_last) 713 return (1); 714 *q++ = *p++; 715 } 716 717 if (!anymeta) { /* No expansion, do next segment. */ 718 pathend = q; 719 pattern = p; 720 while (*pattern == BG_SEP 721 #ifdef DOSISH 722 || *pattern == BG_SEP2 723 #endif 724 ) { 725 assert(p < pattern_last); 726 if (pathend+1 > pathend_last) 727 return (1); 728 *pathend++ = *pattern++; 729 } 730 } else 731 /* Need expansion, recurse. */ 732 return(glob3(pathbuf, pathbuf_last, pathend, 733 pathend_last, pattern, 734 p, pattern_last, pglob, limitp)); 735 } 736 /* NOTREACHED */ 737 } 738 739 static int 740 glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last, 741 Char *pattern, 742 Char *restpattern, Char *restpattern_last, glob_t *pglob, size_t *limitp) 743 { 744 Direntry_t *dp; 745 DIR *dirp; 746 int err; 747 int nocase; 748 char buf[MAXPATHLEN]; 749 750 /* 751 * The readdirfunc declaration can't be prototyped, because it is 752 * assigned, below, to two functions which are prototyped in glob.h 753 * and dirent.h as taking pointers to differently typed opaque 754 * structures. 755 */ 756 Direntry_t *(*readdirfunc)(DIR*); 757 758 assert(pattern < restpattern_last); 759 assert(restpattern < restpattern_last); 760 761 if (pathend > pathend_last) 762 return (1); 763 *pathend = BG_EOS; 764 errno = 0; 765 766 #ifdef VMS 767 { 768 Char *q = pathend; 769 if (q - pathbuf > 5) { 770 q -= 5; 771 if (q[0] == '.' && 772 tolower(q[1]) == 'd' && tolower(q[2]) == 'i' && 773 tolower(q[3]) == 'r' && q[4] == '/') 774 { 775 q[0] = '/'; 776 q[1] = BG_EOS; 777 pathend = q+1; 778 } 779 } 780 } 781 #endif 782 783 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { 784 /* TODO: don't call for ENOENT or ENOTDIR? */ 785 if (pglob->gl_errfunc) { 786 if (g_Ctoc(pathbuf, buf, sizeof(buf))) 787 return (GLOB_ABEND); 788 if (pglob->gl_errfunc(buf, errno) || 789 (pglob->gl_flags & GLOB_ERR)) 790 return (GLOB_ABEND); 791 } 792 return(0); 793 } 794 795 err = 0; 796 nocase = ((pglob->gl_flags & GLOB_NOCASE) != 0); 797 798 /* Search directory for matching names. */ 799 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 800 readdirfunc = (Direntry_t *(*)(DIR *))pglob->gl_readdir; 801 else 802 readdirfunc = (Direntry_t *(*)(DIR *))my_readdir; 803 while ((dp = (*readdirfunc)(dirp))) { 804 U8 *sc; 805 Char *dc; 806 807 /* Initial BG_DOT must be matched literally. */ 808 if (dp->d_name[0] == BG_DOT && *pattern != BG_DOT) 809 continue; 810 dc = pathend; 811 sc = (U8 *) dp->d_name; 812 while (dc < pathend_last && (*dc++ = *sc++) != BG_EOS) 813 ; 814 if (dc >= pathend_last) { 815 *dc = BG_EOS; 816 err = 1; 817 break; 818 } 819 820 if (!match(pathend, pattern, restpattern, nocase)) { 821 *pathend = BG_EOS; 822 continue; 823 } 824 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last, 825 restpattern, restpattern_last, pglob, limitp); 826 if (err) 827 break; 828 } 829 830 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 831 (*pglob->gl_closedir)(dirp); 832 else 833 PerlDir_close(dirp); 834 return(err); 835 } 836 837 838 /* 839 * Extend the gl_pathv member of a glob_t structure to accommodate a new item, 840 * add the new item, and update gl_pathc. 841 * 842 * This assumes the BSD realloc, which only copies the block when its size 843 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 844 * behavior. 845 * 846 * Return 0 if new item added, error code if memory couldn't be allocated. 847 * 848 * Invariant of the glob_t structure: 849 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 850 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 851 */ 852 static int 853 globextend(const Char *path, glob_t *pglob, size_t *limitp) 854 { 855 char **pathv; 856 int i; 857 STRLEN newsize, len; 858 char *copy; 859 const Char *p; 860 861 #ifdef GLOB_DEBUG 862 printf("Adding "); 863 for (p = path; *p; p++) 864 (void)printf("%c", CHAR(*p)); 865 printf("\n"); 866 #endif /* GLOB_DEBUG */ 867 868 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 869 if (pglob->gl_pathv) 870 pathv = Renew(pglob->gl_pathv,newsize,char*); 871 else 872 Newx(pathv,newsize,char*); 873 if (pathv == NULL) { 874 if (pglob->gl_pathv) { 875 Safefree(pglob->gl_pathv); 876 pglob->gl_pathv = NULL; 877 } 878 return(GLOB_NOSPACE); 879 } 880 881 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 882 /* first time around -- clear initial gl_offs items */ 883 pathv += pglob->gl_offs; 884 for (i = pglob->gl_offs; --i >= 0; ) 885 *--pathv = NULL; 886 } 887 pglob->gl_pathv = pathv; 888 889 for (p = path; *p++;) 890 ; 891 len = (STRLEN)(p - path); 892 *limitp += len; 893 Newx(copy, p-path, char); 894 if (copy != NULL) { 895 if (g_Ctoc(path, copy, len)) { 896 Safefree(copy); 897 return(GLOB_NOSPACE); 898 } 899 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 900 } 901 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 902 903 if ((pglob->gl_flags & GLOB_LIMIT) && 904 newsize + *limitp >= (unsigned long)ARG_MAX) { 905 errno = 0; 906 return(GLOB_NOSPACE); 907 } 908 909 return(copy == NULL ? GLOB_NOSPACE : 0); 910 } 911 912 913 /* 914 * pattern matching function for filenames using state machine to avoid 915 * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack 916 * without additional callframes, and to do cleanly prune the backtracking 917 * state when multiple '*' (start) matches are included in the pattern. 918 * 919 * Thanks to Russ Cox for the improved state machine logic to avoid quadratic 920 * matching on failure. 921 * 922 * https://research.swtch.com/glob 923 * 924 * An example would be a pattern 925 * ("a*" x 100) . "y" 926 * against a file name like 927 * ("a" x 100) . "x" 928 * 929 */ 930 static int 931 match(Char *name, Char *pat, Char *patend, int nocase) 932 { 933 int ok, negate_range; 934 Char c, k; 935 Char *nextp = NULL; 936 Char *nextn = NULL; 937 938 redo: 939 while (pat < patend) { 940 c = *pat++; 941 switch (c & M_MASK) { 942 case M_ALL: 943 if (pat == patend) 944 return(1); 945 if (*name == BG_EOS) 946 return 0; 947 nextn = name + 1; 948 nextp = pat - 1; 949 break; 950 case M_ONE: 951 /* since * matches leftmost-shortest first * 952 * if we encounter the EOS then backtracking * 953 * will not help, so we can exit early here. */ 954 if (*name++ == BG_EOS) 955 return 0; 956 break; 957 case M_SET: 958 ok = 0; 959 /* since * matches leftmost-shortest first * 960 * if we encounter the EOS then backtracking * 961 * will not help, so we can exit early here. */ 962 if ((k = *name++) == BG_EOS) 963 return 0; 964 if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS) 965 ++pat; 966 while (((c = *pat++) & M_MASK) != M_END) 967 if ((*pat & M_MASK) == M_RNG) { 968 if (nocase) { 969 if (tolower(c) <= tolower(k) && tolower(k) <= tolower(pat[1])) 970 ok = 1; 971 } else { 972 if (c <= k && k <= pat[1]) 973 ok = 1; 974 } 975 pat += 2; 976 } else if (nocase ? (tolower(c) == tolower(k)) : (c == k)) 977 ok = 1; 978 if (ok == negate_range) 979 goto fail; 980 break; 981 default: 982 k = *name++; 983 if (nocase ? (tolower(k) != tolower(c)) : (k != c)) 984 goto fail; 985 break; 986 } 987 } 988 if (*name == BG_EOS) 989 return 1; 990 991 fail: 992 if (nextn) { 993 pat = nextp; 994 name = nextn; 995 goto redo; 996 } 997 return 0; 998 } 999 1000 /* Free allocated data belonging to a glob_t structure. */ 1001 void 1002 bsd_globfree(glob_t *pglob) 1003 { 1004 int i; 1005 char **pp; 1006 1007 if (pglob->gl_pathv != NULL) { 1008 pp = pglob->gl_pathv + pglob->gl_offs; 1009 for (i = pglob->gl_pathc; i--; ++pp) 1010 if (*pp) 1011 Safefree(*pp); 1012 Safefree(pglob->gl_pathv); 1013 pglob->gl_pathv = NULL; 1014 } 1015 } 1016 1017 static DIR * 1018 g_opendir(Char *str, glob_t *pglob) 1019 { 1020 char buf[MAXPATHLEN]; 1021 1022 if (!*str) { 1023 my_strlcpy(buf, ".", sizeof(buf)); 1024 } else { 1025 if (g_Ctoc(str, buf, sizeof(buf))) 1026 return(NULL); 1027 } 1028 1029 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1030 return((DIR*)(*pglob->gl_opendir)(buf)); 1031 1032 return(PerlDir_open(buf)); 1033 } 1034 1035 static int 1036 g_lstat(Char *fn, Stat_t *sb, glob_t *pglob) 1037 { 1038 char buf[MAXPATHLEN]; 1039 1040 if (g_Ctoc(fn, buf, sizeof(buf))) 1041 return(-1); 1042 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1043 return((*pglob->gl_lstat)(buf, sb)); 1044 #ifdef HAS_LSTAT 1045 return(PerlLIO_lstat(buf, sb)); 1046 #else 1047 return(PerlLIO_stat(buf, sb)); 1048 #endif /* HAS_LSTAT */ 1049 } 1050 1051 static int 1052 g_stat(Char *fn, Stat_t *sb, glob_t *pglob) 1053 { 1054 char buf[MAXPATHLEN]; 1055 1056 if (g_Ctoc(fn, buf, sizeof(buf))) 1057 return(-1); 1058 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1059 return((*pglob->gl_stat)(buf, sb)); 1060 return(PerlLIO_stat(buf, sb)); 1061 } 1062 1063 static Char * 1064 g_strchr(Char *str, int ch) 1065 { 1066 do { 1067 if (*str == ch) 1068 return (str); 1069 } while (*str++); 1070 return (NULL); 1071 } 1072 1073 static int 1074 g_Ctoc(const Char *str, char *buf, STRLEN len) 1075 { 1076 while (len--) { 1077 if ((*buf++ = (char)*str++) == BG_EOS) 1078 return (0); 1079 } 1080 return (1); 1081 } 1082 1083 #ifdef GLOB_DEBUG 1084 static void 1085 qprintf(const char *str, Char *s) 1086 { 1087 Char *p; 1088 1089 (void)printf("%s:\n", str); 1090 for (p = s; *p; p++) 1091 (void)printf("%c", CHAR(*p)); 1092 (void)printf("\n"); 1093 for (p = s; *p; p++) 1094 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 1095 (void)printf("\n"); 1096 for (p = s; *p; p++) 1097 (void)printf("%c", ismeta(*p) ? '_' : ' '); 1098 (void)printf("\n"); 1099 } 1100 #endif /* GLOB_DEBUG */ 1101