1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.11 (Berkeley) 06/23/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * glob(3) -- a superset of the one defined in POSIX 1003.2. 17 * 18 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 19 * 20 * Optional extra services, controlled by flags not defined by POSIX: 21 * 22 * GLOB_QUOTE: 23 * Escaping convention: \ inhibits any special meaning the following 24 * character might have (except \ at end of string is retained). 25 * GLOB_MAGCHAR: 26 * Set in gl_flags if pattern contained a globbing character. 27 * gl_matchc: 28 * Number of matches in the current invocation of glob. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 #include <sys/stat.h> 34 #include <dirent.h> 35 #include <glob.h> 36 #include <ctype.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 42 #define DOLLAR '$' 43 #define DOT '.' 44 #define EOS '\0' 45 #define LBRACKET '[' 46 #define NOT '!' 47 #define QUESTION '?' 48 #define QUOTE '\\' 49 #define RANGE '-' 50 #define RBRACKET ']' 51 #define SEP '/' 52 #define STAR '*' 53 #define TILDE '~' 54 #define UNDERSCORE '_' 55 56 #define M_QUOTE 0x8000 57 #define M_PROTECT 0x4000 58 #define M_MASK 0xffff 59 60 #define META(c) ((c)|M_QUOTE) 61 #define M_ALL META('*') 62 #define M_END META(']') 63 #define M_NOT META('!') 64 #define M_ONE META('?') 65 #define M_RNG META('-') 66 #define M_SET META('[') 67 #define ismeta(c) (((c)&M_QUOTE) != 0) 68 69 typedef u_short Char; 70 71 static int compare __P((const void *, const void *)); 72 static void g_Ctoc __P((Char *, char *)); 73 static int g_lstat __P((Char *, struct stat *)); 74 static DIR *g_opendir __P((Char *)); 75 static Char *g_strchr __P((Char *, int)); 76 static int g_stat __P((Char *, struct stat *)); 77 static int glob1 __P((Char *, glob_t *)); 78 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 79 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 80 static int globextend __P((Char *, glob_t *)); 81 static int match __P((Char *, Char *, Char *)); 82 #ifdef DEBUG 83 static void qprintf __P((Char *)); 84 #endif 85 86 /* 87 * The main glob() routine: compiles the pattern (optionally processing 88 * quotes), calls glob1() to do the real pattern matching, and finally 89 * sorts the list (unless unsorted operation is requested). Returns 0 90 * if things went well, nonzero if errors occurred. It is not an error 91 * to find no matches. 92 */ 93 glob(pattern, flags, errfunc, pglob) 94 const char *pattern; 95 int flags, (*errfunc) __P((char *, int)); 96 glob_t *pglob; 97 { 98 const char *compilepat, *patnext; 99 int c, err, oldpathc; 100 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 101 102 patnext = pattern; 103 if (!(flags & GLOB_APPEND)) { 104 pglob->gl_pathc = 0; 105 pglob->gl_pathv = NULL; 106 if (!(flags & GLOB_DOOFFS)) 107 pglob->gl_offs = 0; 108 } 109 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 110 pglob->gl_errfunc = errfunc; 111 oldpathc = pglob->gl_pathc; 112 pglob->gl_matchc = 0; 113 114 bufnext = patbuf; 115 bufend = bufnext + MAXPATHLEN; 116 compilebuf = bufnext; 117 compilepat = patnext; 118 if (flags & GLOB_QUOTE) { 119 /* Protect the quoted characters. */ 120 while (bufnext < bufend && (c = *patnext++) != EOS) 121 if (c == QUOTE) { 122 if ((c = *patnext++) == EOS) { 123 c = QUOTE; 124 --patnext; 125 } 126 *bufnext++ = c | M_PROTECT; 127 } 128 else 129 *bufnext++ = c; 130 } 131 else 132 while (bufnext < bufend && (c = *patnext++) != EOS) 133 *bufnext++ = c; 134 *bufnext = EOS; 135 136 bufnext = patbuf; 137 qpatnext = patbuf; 138 /* We don't need to check for buffer overflow any more. */ 139 while ((c = *qpatnext++) != EOS) { 140 switch (c) { 141 case LBRACKET: 142 pglob->gl_flags |= GLOB_MAGCHAR; 143 c = *qpatnext; 144 if (c == NOT) 145 ++qpatnext; 146 if (*qpatnext == EOS || 147 g_strchr(qpatnext+1, RBRACKET) == NULL) { 148 *bufnext++ = LBRACKET; 149 if (c == NOT) 150 --qpatnext; 151 break; 152 } 153 *bufnext++ = M_SET; 154 if (c == NOT) 155 *bufnext++ = M_NOT; 156 c = *qpatnext++; 157 do { 158 *bufnext++ = c; 159 if (*qpatnext == RANGE && 160 (c = qpatnext[1]) != RBRACKET) { 161 *bufnext++ = M_RNG; 162 *bufnext++ = c; 163 qpatnext += 2; 164 } 165 } while ((c = *qpatnext++) != RBRACKET); 166 *bufnext++ = M_END; 167 break; 168 case QUESTION: 169 pglob->gl_flags |= GLOB_MAGCHAR; 170 *bufnext++ = M_ONE; 171 break; 172 case STAR: 173 pglob->gl_flags |= GLOB_MAGCHAR; 174 *bufnext++ = M_ALL; 175 break; 176 default: 177 *bufnext++ = c; 178 break; 179 } 180 } 181 *bufnext = EOS; 182 #ifdef DEBUG 183 qprintf(patbuf); 184 #endif 185 186 if ((err = glob1(patbuf, pglob)) != 0) 187 return(err); 188 189 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 190 if (!(flags & GLOB_QUOTE)) { 191 Char *dp = compilebuf; 192 const char *sp = compilepat; 193 while (*dp++ = (u_char)*sp++); 194 } 195 else { 196 /* 197 * Copy pattern, interpreting quotes; this is slightly 198 * different than the interpretation of quotes above 199 * -- which should prevail? 200 */ 201 while (*compilepat != EOS) { 202 if (*compilepat == QUOTE) { 203 if (*++compilepat == EOS) 204 --compilepat; 205 } 206 *compilebuf++ = (u_char)*compilepat++; 207 } 208 *compilebuf = EOS; 209 } 210 return(globextend(patbuf, pglob)); 211 } else if (!(flags & GLOB_NOSORT)) 212 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 213 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 214 return(0); 215 } 216 217 static int 218 compare(p, q) 219 const void *p, *q; 220 { 221 return(strcmp(*(char **)p, *(char **)q)); 222 } 223 224 static 225 glob1(pattern, pglob) 226 Char *pattern; 227 glob_t *pglob; 228 { 229 Char pathbuf[MAXPATHLEN+1]; 230 231 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 232 if (*pattern == EOS) 233 return(0); 234 return(glob2(pathbuf, pathbuf, pattern, pglob)); 235 } 236 237 /* 238 * The functions glob2 and glob3 are mutually recursive; there is one level 239 * of recursion for each segment in the pattern that contains one or more 240 * meta characters. 241 */ 242 static 243 glob2(pathbuf, pathend, pattern, pglob) 244 Char *pathbuf, *pathend, *pattern; 245 glob_t *pglob; 246 { 247 struct stat sb; 248 Char *p, *q; 249 int anymeta; 250 251 /* 252 * Loop over pattern segments until end of pattern or until 253 * segment with meta character found. 254 */ 255 for (anymeta = 0;;) { 256 if (*pattern == EOS) { /* End of pattern? */ 257 *pathend = EOS; 258 if (g_stat(pathbuf, &sb)) 259 return(0); 260 261 if (((pglob->gl_flags & GLOB_MARK) && 262 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 263 || (S_ISLNK(sb.st_mode) && 264 (g_stat(pathbuf, &sb) == 0) && 265 S_ISDIR(sb.st_mode)))) { 266 *pathend++ = SEP; 267 *pathend = EOS; 268 } 269 ++pglob->gl_matchc; 270 return(globextend(pathbuf, pglob)); 271 } 272 273 /* Find end of next segment, copy tentatively to pathend. */ 274 q = pathend; 275 p = pattern; 276 while (*p != EOS && *p != SEP) { 277 if (ismeta(*p)) 278 anymeta = 1; 279 *q++ = *p++; 280 } 281 282 if (!anymeta) { /* No expansion, do next segment. */ 283 pathend = q; 284 pattern = p; 285 while (*pattern == SEP) 286 *pathend++ = *pattern++; 287 } else /* Need expansion, recurse. */ 288 return(glob3(pathbuf, pathend, pattern, p, pglob)); 289 } 290 /* NOTREACHED */ 291 } 292 293 static 294 glob3(pathbuf, pathend, pattern, restpattern, pglob) 295 Char *pathbuf, *pathend, *pattern, *restpattern; 296 glob_t *pglob; 297 { 298 register struct dirent *dp; 299 DIR *dirp; 300 int len, err; 301 302 *pathend = EOS; 303 errno = 0; 304 305 if (!(dirp = g_opendir(pathbuf))) 306 /* TODO: don't call for ENOENT or ENOTDIR? */ 307 if (pglob->gl_errfunc && 308 (*pglob->gl_errfunc)(pathbuf, errno) || 309 (pglob->gl_flags & GLOB_ERR)) 310 return(GLOB_ABEND); 311 else 312 return(0); 313 314 err = 0; 315 316 /* Search directory for matching names. */ 317 while ((dp = readdir(dirp))) { 318 register char *sc; 319 register Char *dc; 320 321 /* Initial DOT must be matched literally. */ 322 if (dp->d_name[0] == DOT && *pattern != DOT) 323 continue; 324 for (sc = dp->d_name, dc = pathend; 325 *dc++ = (u_char)*sc++;); 326 if (!match(pathend, pattern, restpattern)) { 327 *pathend = EOS; 328 continue; 329 } 330 err = glob2(pathbuf, --dc, restpattern, pglob); 331 if (err) 332 break; 333 } 334 335 /* TODO: check error from readdir? */ 336 (void)closedir(dirp); 337 return(err); 338 } 339 340 341 /* 342 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 343 * add the new item, and update gl_pathc. 344 * 345 * This assumes the BSD realloc, which only copies the block when its size 346 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 347 * behavior. 348 * 349 * Return 0 if new item added, error code if memory couldn't be allocated. 350 * 351 * Invariant of the glob_t structure: 352 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 353 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 354 */ 355 static int 356 globextend(path, pglob) 357 Char *path; 358 glob_t *pglob; 359 { 360 register char **pathv; 361 register int i; 362 u_int newsize; 363 char *copy; 364 Char *p; 365 366 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 367 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 368 if (pathv == NULL) 369 return(GLOB_NOSPACE); 370 371 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 372 /* first time around -- clear initial gl_offs items */ 373 pathv += pglob->gl_offs; 374 for (i = pglob->gl_offs; --i >= 0; ) 375 *--pathv = NULL; 376 } 377 pglob->gl_pathv = pathv; 378 379 for (p = path; *p++;); 380 if ((copy = malloc(p - path)) != NULL) { 381 g_Ctoc(path, copy); 382 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 383 } 384 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 385 return(copy == NULL ? GLOB_NOSPACE : 0); 386 } 387 388 389 /* 390 * pattern matching function for filenames. Each occurrence of the * 391 * pattern causes a recursion level. 392 */ 393 static 394 match(name, pat, patend) 395 register Char *name, *pat, *patend; 396 { 397 int ok, negate_range; 398 Char c, k; 399 400 while (pat < patend) { 401 c = *pat++; 402 switch (c & M_MASK) { 403 case M_ALL: 404 if (pat == patend) 405 return(1); 406 for (; *name != EOS; ++name) 407 if (match(name, pat, patend)) 408 return(1); 409 return(0); 410 case M_ONE: 411 if (*name++ == EOS) 412 return(0); 413 break; 414 case M_SET: 415 ok = 0; 416 k = *name++; 417 if (negate_range = ((*pat & M_MASK) == M_NOT)) 418 ++pat; 419 while (((c = *pat++) & M_MASK) != M_END) 420 if ((*pat & M_MASK) == M_RNG) { 421 if (c <= k && k <= pat[1]) 422 ok = 1; 423 pat += 2; 424 } else if (c == k) 425 ok = 1; 426 if (ok == negate_range) 427 return(0); 428 break; 429 default: 430 if (*name++ != c) 431 return(0); 432 break; 433 } 434 } 435 return(*name == EOS); 436 } 437 438 /* Free allocated data belonging to a glob_t structure. */ 439 void 440 globfree(pglob) 441 glob_t *pglob; 442 { 443 register int i; 444 register char **pp; 445 446 if (pglob->gl_pathv != NULL) { 447 pp = pglob->gl_pathv + pglob->gl_offs; 448 for (i = pglob->gl_pathc; i--; ++pp) 449 if (*pp) 450 free(*pp); 451 free(pglob->gl_pathv); 452 } 453 } 454 455 static DIR * 456 g_opendir(str) 457 register Char *str; 458 { 459 char buf[MAXPATHLEN]; 460 461 if (!*str) 462 return(opendir(".")); 463 g_Ctoc(str, buf); 464 return(opendir(buf)); 465 } 466 467 static int 468 g_lstat(fn, sb) 469 register Char *fn; 470 struct stat *sb; 471 { 472 char buf[MAXPATHLEN]; 473 474 g_Ctoc(fn, buf); 475 return(lstat(buf, sb)); 476 } 477 478 static int 479 g_stat(fn, sb) 480 register Char *fn; 481 struct stat *sb; 482 { 483 char buf[MAXPATHLEN]; 484 485 g_Ctoc(fn, buf); 486 return(stat(buf, sb)); 487 } 488 489 static Char * 490 g_strchr(str, ch) 491 Char *str; 492 int ch; 493 { 494 do { 495 if (*str == ch) 496 return (str); 497 } while (*str++); 498 return (NULL); 499 } 500 501 static void 502 g_Ctoc(str, buf) 503 register Char *str; 504 char *buf; 505 { 506 register char *dc; 507 508 for (dc = buf; *dc++ = *str++;); 509 } 510 511 #ifdef DEBUG 512 static void 513 qprintf(s) 514 register Char *s; 515 { 516 register Char *p; 517 518 for (p = s; *p; p++) 519 (void)printf("%c", *p & 0xff); 520 (void)printf("\n"); 521 for (p = s; *p; p++) 522 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 523 (void)printf("\n"); 524 for (p = s; *p; p++) 525 (void)printf("%c", *p & M_META ? '_' : ' '); 526 (void)printf("\n"); 527 } 528 #endif 529