1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.18 (Berkeley) 12/04/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * glob(3) -- a superset of the one defined in POSIX 1003.2. 17 * 18 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 19 * 20 * Optional extra services, controlled by flags not defined by POSIX: 21 * 22 * GLOB_QUOTE: 23 * Escaping convention: \ inhibits any special meaning the following 24 * character might have (except \ at end of string is retained). 25 * GLOB_MAGCHAR: 26 * Set in gl_flags if pattern contained a globbing character. 27 * GLOB_NOMAGIC: 28 * Same as GLOB_NOCHECK, but it will only append pattern if it did 29 * not contain any magic characters. [Used in csh style globbing] 30 * GLOB_ALTDIRFUNC: 31 * Use alternately specified directory access functions. 32 * gl_matchc: 33 * Number of matches in the current invocation of glob. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/stat.h> 38 #include <dirent.h> 39 #include <glob.h> 40 #include <ctype.h> 41 #include <errno.h> 42 #include <string.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 46 #define DOLLAR '$' 47 #define DOT '.' 48 #define EOS '\0' 49 #define LBRACKET '[' 50 #define NOT '!' 51 #define QUESTION '?' 52 #define QUOTE '\\' 53 #define RANGE '-' 54 #define RBRACKET ']' 55 #define SEP '/' 56 #define STAR '*' 57 #define TILDE '~' 58 #define UNDERSCORE '_' 59 60 #define M_QUOTE 0x8000 61 #define M_PROTECT 0x4000 62 #define M_MASK 0xffff 63 #define M_ASCII 0x00ff 64 65 #define CHAR(c) ((c)&M_ASCII) 66 #define META(c) ((c)|M_QUOTE) 67 #define M_ALL META('*') 68 #define M_END META(']') 69 #define M_NOT META('!') 70 #define M_ONE META('?') 71 #define M_RNG META('-') 72 #define M_SET META('[') 73 #define ismeta(c) (((c)&M_QUOTE) != 0) 74 75 typedef u_short Char; 76 77 static int compare __P((const void *, const void *)); 78 static void g_Ctoc __P((Char *, char *)); 79 static int g_lstat __P((Char *, struct stat *, glob_t *)); 80 static DIR *g_opendir __P((Char *, glob_t *)); 81 static Char *g_strchr __P((Char *, int)); 82 static int g_stat __P((Char *, struct stat *, glob_t *)); 83 static int glob1 __P((Char *, glob_t *)); 84 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 85 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 86 static int globextend __P((Char *, glob_t *)); 87 static int match __P((Char *, Char *, Char *)); 88 #ifdef DEBUG 89 static void qprintf __P((Char *)); 90 #endif 91 92 /* 93 * The main glob() routine: compiles the pattern (optionally processing 94 * quotes), calls glob1() to do the real pattern matching, and finally 95 * sorts the list (unless unsorted operation is requested). Returns 0 96 * if things went well, nonzero if errors occurred. It is not an error 97 * to find no matches. 98 */ 99 glob(pattern, flags, errfunc, pglob) 100 const char *pattern; 101 int flags, (*errfunc) __P((char *, int)); 102 glob_t *pglob; 103 { 104 const u_char *compilepat, *patnext; 105 int c, err, oldpathc; 106 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 107 108 patnext = (u_char *) pattern; 109 if (!(flags & GLOB_APPEND)) { 110 pglob->gl_pathc = 0; 111 pglob->gl_pathv = NULL; 112 if (!(flags & GLOB_DOOFFS)) 113 pglob->gl_offs = 0; 114 } 115 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 116 pglob->gl_errfunc = errfunc; 117 oldpathc = pglob->gl_pathc; 118 pglob->gl_matchc = 0; 119 120 bufnext = patbuf; 121 bufend = bufnext + MAXPATHLEN; 122 compilebuf = bufnext; 123 compilepat = patnext; 124 if (flags & GLOB_QUOTE) { 125 /* Protect the quoted characters. */ 126 while (bufnext < bufend && (c = *patnext++) != EOS) 127 if (c == QUOTE) { 128 if ((c = *patnext++) == EOS) { 129 c = QUOTE; 130 --patnext; 131 } 132 *bufnext++ = c | M_PROTECT; 133 } 134 else 135 *bufnext++ = c; 136 } 137 else 138 while (bufnext < bufend && (c = *patnext++) != EOS) 139 *bufnext++ = c; 140 *bufnext = EOS; 141 142 bufnext = patbuf; 143 qpatnext = patbuf; 144 /* We don't need to check for buffer overflow any more. */ 145 while ((c = *qpatnext++) != EOS) { 146 switch (c) { 147 case LBRACKET: 148 c = *qpatnext; 149 if (c == NOT) 150 ++qpatnext; 151 if (*qpatnext == EOS || 152 g_strchr(qpatnext+1, RBRACKET) == NULL) { 153 *bufnext++ = LBRACKET; 154 if (c == NOT) 155 --qpatnext; 156 break; 157 } 158 *bufnext++ = M_SET; 159 if (c == NOT) 160 *bufnext++ = M_NOT; 161 c = *qpatnext++; 162 do { 163 *bufnext++ = CHAR(c); 164 if (*qpatnext == RANGE && 165 (c = qpatnext[1]) != RBRACKET) { 166 *bufnext++ = M_RNG; 167 *bufnext++ = CHAR(c); 168 qpatnext += 2; 169 } 170 } while ((c = *qpatnext++) != RBRACKET); 171 pglob->gl_flags |= GLOB_MAGCHAR; 172 *bufnext++ = M_END; 173 break; 174 case QUESTION: 175 pglob->gl_flags |= GLOB_MAGCHAR; 176 *bufnext++ = M_ONE; 177 break; 178 case STAR: 179 pglob->gl_flags |= GLOB_MAGCHAR; 180 /* collapse adjacent stars to one, 181 * to avoid exponential behavior 182 */ 183 if (bufnext == patbuf || bufnext[-1] != M_ALL) 184 *bufnext++ = M_ALL; 185 break; 186 default: 187 *bufnext++ = CHAR(c); 188 break; 189 } 190 } 191 *bufnext = EOS; 192 #ifdef DEBUG 193 qprintf(patbuf); 194 #endif 195 196 if ((err = glob1(patbuf, pglob)) != 0) 197 return(err); 198 199 /* 200 * If there was no match we are going to append the pattern 201 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 202 * and the pattern did not contain any magic characters 203 * GLOB_NOMAGIC is there just for compatibility with csh. 204 */ 205 if (pglob->gl_pathc == oldpathc && 206 ((flags & GLOB_NOCHECK) || 207 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 208 if (!(flags & GLOB_QUOTE)) { 209 Char *dp = compilebuf; 210 const u_char *sp = compilepat; 211 while (*dp++ = *sp++); 212 } 213 else { 214 /* 215 * Copy pattern, interpreting quotes; this is slightly 216 * different than the interpretation of quotes above 217 * -- which should prevail? 218 */ 219 while (*compilepat != EOS) { 220 if (*compilepat == QUOTE) { 221 if (*++compilepat == EOS) 222 --compilepat; 223 } 224 *compilebuf++ = (u_char)*compilepat++; 225 } 226 *compilebuf = EOS; 227 } 228 return(globextend(patbuf, pglob)); 229 } else if (!(flags & GLOB_NOSORT)) 230 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 231 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 232 return(0); 233 } 234 235 static int 236 compare(p, q) 237 const void *p, *q; 238 { 239 return(strcmp(*(char **)p, *(char **)q)); 240 } 241 242 static 243 glob1(pattern, pglob) 244 Char *pattern; 245 glob_t *pglob; 246 { 247 Char pathbuf[MAXPATHLEN+1]; 248 249 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 250 if (*pattern == EOS) 251 return(0); 252 return(glob2(pathbuf, pathbuf, pattern, pglob)); 253 } 254 255 /* 256 * The functions glob2 and glob3 are mutually recursive; there is one level 257 * of recursion for each segment in the pattern that contains one or more 258 * meta characters. 259 */ 260 static 261 glob2(pathbuf, pathend, pattern, pglob) 262 Char *pathbuf, *pathend, *pattern; 263 glob_t *pglob; 264 { 265 struct stat sb; 266 Char *p, *q; 267 int anymeta; 268 269 /* 270 * Loop over pattern segments until end of pattern or until 271 * segment with meta character found. 272 */ 273 for (anymeta = 0;;) { 274 if (*pattern == EOS) { /* End of pattern? */ 275 *pathend = EOS; 276 if (g_lstat(pathbuf, &sb, pglob)) 277 return(0); 278 279 if (((pglob->gl_flags & GLOB_MARK) && 280 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 281 || (S_ISLNK(sb.st_mode) && 282 (g_stat(pathbuf, &sb, pglob) == 0) && 283 S_ISDIR(sb.st_mode)))) { 284 *pathend++ = SEP; 285 *pathend = EOS; 286 } 287 ++pglob->gl_matchc; 288 return(globextend(pathbuf, pglob)); 289 } 290 291 /* Find end of next segment, copy tentatively to pathend. */ 292 q = pathend; 293 p = pattern; 294 while (*p != EOS && *p != SEP) { 295 if (ismeta(*p)) 296 anymeta = 1; 297 *q++ = *p++; 298 } 299 300 if (!anymeta) { /* No expansion, do next segment. */ 301 pathend = q; 302 pattern = p; 303 while (*pattern == SEP) 304 *pathend++ = *pattern++; 305 } else /* Need expansion, recurse. */ 306 return(glob3(pathbuf, pathend, pattern, p, pglob)); 307 } 308 /* NOTREACHED */ 309 } 310 311 static 312 glob3(pathbuf, pathend, pattern, restpattern, pglob) 313 Char *pathbuf, *pathend, *pattern, *restpattern; 314 glob_t *pglob; 315 { 316 register struct dirent *dp; 317 struct dirent *(*readdirfunc)(); 318 DIR *dirp; 319 int len, err; 320 char buf[MAXPATHLEN]; 321 322 *pathend = EOS; 323 errno = 0; 324 325 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { 326 /* TODO: don't call for ENOENT or ENOTDIR? */ 327 if (pglob->gl_errfunc) { 328 g_Ctoc(pathbuf, buf); 329 if (pglob->gl_errfunc(buf, errno) || 330 pglob->gl_flags & GLOB_ERR) 331 return (GLOB_ABEND); 332 } 333 return(0); 334 } 335 336 err = 0; 337 338 /* Search directory for matching names. */ 339 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 340 readdirfunc = pglob->gl_readdir; 341 else 342 readdirfunc = readdir; 343 while ((dp = (*readdirfunc)(dirp))) { 344 register u_char *sc; 345 register Char *dc; 346 347 /* Initial DOT must be matched literally. */ 348 if (dp->d_name[0] == DOT && *pattern != DOT) 349 continue; 350 for (sc = (u_char *) dp->d_name, dc = pathend; 351 *dc++ = *sc++;); 352 if (!match(pathend, pattern, restpattern)) { 353 *pathend = EOS; 354 continue; 355 } 356 err = glob2(pathbuf, --dc, restpattern, pglob); 357 if (err) 358 break; 359 } 360 361 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 362 (*pglob->gl_closedir)(dirp); 363 else 364 closedir(dirp); 365 return(err); 366 } 367 368 369 /* 370 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 371 * add the new item, and update gl_pathc. 372 * 373 * This assumes the BSD realloc, which only copies the block when its size 374 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 375 * behavior. 376 * 377 * Return 0 if new item added, error code if memory couldn't be allocated. 378 * 379 * Invariant of the glob_t structure: 380 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 381 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 382 */ 383 static int 384 globextend(path, pglob) 385 Char *path; 386 glob_t *pglob; 387 { 388 register char **pathv; 389 register int i; 390 u_int newsize; 391 char *copy; 392 Char *p; 393 394 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 395 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 396 if (pathv == NULL) 397 return(GLOB_NOSPACE); 398 399 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 400 /* first time around -- clear initial gl_offs items */ 401 pathv += pglob->gl_offs; 402 for (i = pglob->gl_offs; --i >= 0; ) 403 *--pathv = NULL; 404 } 405 pglob->gl_pathv = pathv; 406 407 for (p = path; *p++;); 408 if ((copy = malloc(p - path)) != NULL) { 409 g_Ctoc(path, copy); 410 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 411 } 412 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 413 return(copy == NULL ? GLOB_NOSPACE : 0); 414 } 415 416 417 /* 418 * pattern matching function for filenames. Each occurrence of the * 419 * pattern causes a recursion level. 420 */ 421 static 422 match(name, pat, patend) 423 register Char *name, *pat, *patend; 424 { 425 int ok, negate_range; 426 Char c, k; 427 428 while (pat < patend) { 429 c = *pat++; 430 switch (c & M_MASK) { 431 case M_ALL: 432 if (pat == patend) 433 return(1); 434 do 435 if (match(name, pat, patend)) 436 return(1); 437 while (*name++ != EOS); 438 return(0); 439 case M_ONE: 440 if (*name++ == EOS) 441 return(0); 442 break; 443 case M_SET: 444 ok = 0; 445 if ((k = *name++) == EOS) 446 return(0); 447 if (negate_range = ((*pat & M_MASK) == M_NOT)) 448 ++pat; 449 while (((c = *pat++) & M_MASK) != M_END) 450 if ((*pat & M_MASK) == M_RNG) { 451 if (c <= k && k <= pat[1]) 452 ok = 1; 453 pat += 2; 454 } else if (c == k) 455 ok = 1; 456 if (ok == negate_range) 457 return(0); 458 break; 459 default: 460 if (*name++ != c) 461 return(0); 462 break; 463 } 464 } 465 return(*name == EOS); 466 } 467 468 /* Free allocated data belonging to a glob_t structure. */ 469 void 470 globfree(pglob) 471 glob_t *pglob; 472 { 473 register int i; 474 register char **pp; 475 476 if (pglob->gl_pathv != NULL) { 477 pp = pglob->gl_pathv + pglob->gl_offs; 478 for (i = pglob->gl_pathc; i--; ++pp) 479 if (*pp) 480 free(*pp); 481 free(pglob->gl_pathv); 482 } 483 } 484 485 static DIR * 486 g_opendir(str, pglob) 487 register Char *str; 488 glob_t *pglob; 489 { 490 char buf[MAXPATHLEN]; 491 char *dirname; 492 493 if (!*str) 494 strcpy(buf, "."); 495 else 496 g_Ctoc(str, buf); 497 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 498 return((*pglob->gl_opendir)(buf)); 499 return(opendir(buf)); 500 } 501 502 static int 503 g_lstat(fn, sb, pglob) 504 register Char *fn; 505 struct stat *sb; 506 glob_t *pglob; 507 { 508 char buf[MAXPATHLEN]; 509 510 g_Ctoc(fn, buf); 511 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 512 return((*pglob->gl_lstat)(buf, sb)); 513 return(lstat(buf, sb)); 514 } 515 516 static int 517 g_stat(fn, sb, pglob) 518 register Char *fn; 519 struct stat *sb; 520 glob_t *pglob; 521 { 522 char buf[MAXPATHLEN]; 523 524 g_Ctoc(fn, buf); 525 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 526 return((*pglob->gl_stat)(buf, sb)); 527 return(stat(buf, sb)); 528 } 529 530 static Char * 531 g_strchr(str, ch) 532 Char *str; 533 int ch; 534 { 535 do { 536 if (*str == ch) 537 return (str); 538 } while (*str++); 539 return (NULL); 540 } 541 542 static void 543 g_Ctoc(str, buf) 544 register Char *str; 545 char *buf; 546 { 547 register char *dc; 548 549 for (dc = buf; *dc++ = *str++;); 550 } 551 552 #ifdef DEBUG 553 static void 554 qprintf(s) 555 register Char *s; 556 { 557 register Char *p; 558 559 for (p = s; *p; p++) 560 (void)printf("%c", CHAR(*p)); 561 (void)printf("\n"); 562 for (p = s; *p; p++) 563 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 564 (void)printf("\n"); 565 for (p = s; *p; p++) 566 (void)printf("%c", ismeta(*p) ? '_' : ' '); 567 (void)printf("\n"); 568 } 569 #endif 570