1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.10 (Berkeley) 06/17/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * glob(3) -- a superset of the one defined in POSIX 1003.2. 17 * 18 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 19 * 20 * Optional extra services, controlled by flags not defined by POSIX: 21 * 22 * GLOB_QUOTE: 23 * Escaping convention: \ inhibits any special meaning the following 24 * character might have (except \ at end of string is retained). 25 * GLOB_MAGCHAR: 26 * Set in gl_flags if pattern contained a globbing character. 27 * gl_matchc: 28 * Number of matches in the current invocation of glob. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 #include <sys/stat.h> 34 #include <dirent.h> 35 #include <glob.h> 36 #include <ctype.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 42 #define DOLLAR '$' 43 #define DOT '.' 44 #define EOS '\0' 45 #define LBRACKET '[' 46 #define NOT '!' 47 #define QUESTION '?' 48 #define QUOTE '\\' 49 #define RANGE '-' 50 #define RBRACKET ']' 51 #define SEP '/' 52 #define STAR '*' 53 #define TILDE '~' 54 #define UNDERSCORE '_' 55 56 #define M_QUOTE 0x8000 57 #define M_PROTECT 0x4000 58 #define M_MASK 0xffff 59 60 #define META(c) ((c)|M_QUOTE) 61 #define M_ALL META('*') 62 #define M_END META(']') 63 #define M_NOT META('!') 64 #define M_ONE META('?') 65 #define M_RNG META('-') 66 #define M_SET META('[') 67 #define ismeta(c) (((c)&M_QUOTE) != 0) 68 69 typedef u_short Char; 70 71 static int compare __P((const void *, const void *)); 72 static void g_Ctoc __P((Char *, char *)); 73 static int g_lstat __P((Char *, struct stat *)); 74 static DIR *g_opendir __P((Char *)); 75 static Char *g_strchr __P((Char *, int)); 76 static int g_stat __P((Char *, struct stat *)); 77 static int glob1 __P((Char *, glob_t *)); 78 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 79 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 80 static int globextend __P((Char *, glob_t *)); 81 static int match __P((Char *, Char *, Char *)); 82 #ifdef DEBUG 83 static void qprintf __P((Char *)); 84 #endif 85 86 /* 87 * The main glob() routine: compiles the pattern (optionally processing 88 * quotes), calls glob1() to do the real pattern matching, and finally 89 * sorts the list (unless unsorted operation is requested). Returns 0 90 * if things went well, nonzero if errors occurred. It is not an error 91 * to find no matches. 92 */ 93 glob(pattern, flags, errfunc, pglob) 94 const char *pattern; 95 int flags, (*errfunc) __P((char *, int)); 96 glob_t *pglob; 97 { 98 const char *compilepat, *patnext; 99 int err, oldpathc; 100 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 101 char c; 102 103 patnext = pattern; 104 if (!(flags & GLOB_APPEND)) { 105 pglob->gl_pathc = 0; 106 pglob->gl_pathv = NULL; 107 if (!(flags & GLOB_DOOFFS)) 108 pglob->gl_offs = 0; 109 } 110 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 111 pglob->gl_errfunc = errfunc; 112 oldpathc = pglob->gl_pathc; 113 pglob->gl_matchc = 0; 114 115 bufnext = patbuf; 116 bufend = bufnext + MAXPATHLEN; 117 compilebuf = bufnext; 118 compilepat = patnext; 119 if (flags & GLOB_QUOTE) { 120 /* Protect the quoted characters. */ 121 while (bufnext < bufend && (c = *patnext++) != EOS) 122 if (c == QUOTE) { 123 if ((c = *patnext++) == EOS) { 124 c = QUOTE; 125 --patnext; 126 } 127 *bufnext++ = c | M_PROTECT; 128 } 129 else 130 *bufnext++ = c; 131 } 132 else 133 while (bufnext < bufend && (c = *patnext++) != EOS) 134 *bufnext++ = c; 135 *bufnext = EOS; 136 137 bufnext = patbuf; 138 qpatnext = patbuf; 139 /* We don't need to check for buffer overflow any more. */ 140 while ((c = *qpatnext++) != EOS) { 141 switch (c) { 142 case LBRACKET: 143 pglob->gl_flags |= GLOB_MAGCHAR; 144 c = *qpatnext; 145 if (c == NOT) 146 ++qpatnext; 147 if (*qpatnext == EOS || 148 g_strchr(qpatnext+1, RBRACKET) == NULL) { 149 *bufnext++ = LBRACKET; 150 if (c == NOT) 151 --qpatnext; 152 break; 153 } 154 *bufnext++ = M_SET; 155 if (c == NOT) 156 *bufnext++ = M_NOT; 157 c = *qpatnext++; 158 do { 159 *bufnext++ = c; 160 if (*qpatnext == RANGE && 161 (c = qpatnext[1]) != RBRACKET) { 162 *bufnext++ = M_RNG; 163 *bufnext++ = c; 164 qpatnext += 2; 165 } 166 } while ((c = *qpatnext++) != RBRACKET); 167 *bufnext++ = M_END; 168 break; 169 case QUESTION: 170 pglob->gl_flags |= GLOB_MAGCHAR; 171 *bufnext++ = M_ONE; 172 break; 173 case STAR: 174 pglob->gl_flags |= GLOB_MAGCHAR; 175 *bufnext++ = M_ALL; 176 break; 177 default: 178 *bufnext++ = c; 179 break; 180 } 181 } 182 *bufnext = EOS; 183 #ifdef DEBUG 184 qprintf(patbuf); 185 #endif 186 187 if ((err = glob1(patbuf, pglob)) != 0) 188 return(err); 189 190 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 191 if (!(flags & GLOB_QUOTE)) { 192 Char *dp = compilebuf; 193 const char *sp = compilepat; 194 while (*dp++ = (u_char)*sp++); 195 } 196 else { 197 /* 198 * Copy pattern, interpreting quotes; this is slightly 199 * different than the interpretation of quotes above 200 * -- which should prevail? 201 */ 202 while (*compilepat != EOS) { 203 if (*compilepat == QUOTE) { 204 if (*++compilepat == EOS) 205 --compilepat; 206 } 207 *compilebuf++ = (u_char)*compilepat++; 208 } 209 *compilebuf = EOS; 210 } 211 return(globextend(patbuf, pglob)); 212 } else if (!(flags & GLOB_NOSORT)) 213 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 214 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 215 return(0); 216 } 217 218 static int 219 compare(p, q) 220 const void *p, *q; 221 { 222 return(strcmp(*(char **)p, *(char **)q)); 223 } 224 225 static 226 glob1(pattern, pglob) 227 Char *pattern; 228 glob_t *pglob; 229 { 230 Char pathbuf[MAXPATHLEN+1]; 231 232 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 233 if (*pattern == EOS) 234 return(0); 235 return(glob2(pathbuf, pathbuf, pattern, pglob)); 236 } 237 238 /* 239 * The functions glob2 and glob3 are mutually recursive; there is one level 240 * of recursion for each segment in the pattern that contains one or more 241 * meta characters. 242 */ 243 static 244 glob2(pathbuf, pathend, pattern, pglob) 245 Char *pathbuf, *pathend, *pattern; 246 glob_t *pglob; 247 { 248 struct stat sb; 249 Char *p, *q; 250 int anymeta; 251 252 /* 253 * Loop over pattern segments until end of pattern or until 254 * segment with meta character found. 255 */ 256 for (anymeta = 0;;) { 257 if (*pattern == EOS) { /* End of pattern? */ 258 *pathend = EOS; 259 if (g_stat(pathbuf, &sb)) 260 return(0); 261 262 if (((pglob->gl_flags & GLOB_MARK) && 263 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 264 || (S_ISLNK(sb.st_mode) && 265 (g_stat(pathbuf, &sb) == 0) && 266 S_ISDIR(sb.st_mode)))) { 267 *pathend++ = SEP; 268 *pathend = EOS; 269 } 270 ++pglob->gl_matchc; 271 return(globextend(pathbuf, pglob)); 272 } 273 274 /* Find end of next segment, copy tentatively to pathend. */ 275 q = pathend; 276 p = pattern; 277 while (*p != EOS && *p != SEP) { 278 if (ismeta(*p)) 279 anymeta = 1; 280 *q++ = *p++; 281 } 282 283 if (!anymeta) { /* No expansion, do next segment. */ 284 pathend = q; 285 pattern = p; 286 while (*pattern == SEP) 287 *pathend++ = *pattern++; 288 } else /* Need expansion, recurse. */ 289 return(glob3(pathbuf, pathend, pattern, p, pglob)); 290 } 291 /* NOTREACHED */ 292 } 293 294 static 295 glob3(pathbuf, pathend, pattern, restpattern, pglob) 296 Char *pathbuf, *pathend, *pattern, *restpattern; 297 glob_t *pglob; 298 { 299 register struct dirent *dp; 300 DIR *dirp; 301 int len, err; 302 303 *pathend = EOS; 304 errno = 0; 305 306 if (!(dirp = g_opendir(pathbuf))) 307 /* TODO: don't call for ENOENT or ENOTDIR? */ 308 if (pglob->gl_errfunc && 309 (*pglob->gl_errfunc)(pathbuf, errno) || 310 (pglob->gl_flags & GLOB_ERR)) 311 return(GLOB_ABEND); 312 else 313 return(0); 314 315 err = 0; 316 317 /* Search directory for matching names. */ 318 while ((dp = readdir(dirp))) { 319 register char *sc; 320 register Char *dc; 321 322 /* Initial DOT must be matched literally. */ 323 if (dp->d_name[0] == DOT && *pattern != DOT) 324 continue; 325 for (sc = dp->d_name, dc = pathend; 326 *dc++ = (u_char)*sc++;); 327 if (!match(pathend, pattern, restpattern)) { 328 *pathend = EOS; 329 continue; 330 } 331 err = glob2(pathbuf, --dc, restpattern, pglob); 332 if (err) 333 break; 334 } 335 336 /* TODO: check error from readdir? */ 337 (void)closedir(dirp); 338 return(err); 339 } 340 341 342 /* 343 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 344 * add the new item, and update gl_pathc. 345 * 346 * This assumes the BSD realloc, which only copies the block when its size 347 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 348 * behavior. 349 * 350 * Return 0 if new item added, error code if memory couldn't be allocated. 351 * 352 * Invariant of the glob_t structure: 353 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 354 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 355 */ 356 static int 357 globextend(path, pglob) 358 Char *path; 359 glob_t *pglob; 360 { 361 register char **pathv; 362 register int i; 363 u_int newsize; 364 char *copy; 365 Char *p; 366 367 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 368 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 369 if (pathv == NULL) 370 return(GLOB_NOSPACE); 371 372 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 373 /* first time around -- clear initial gl_offs items */ 374 pathv += pglob->gl_offs; 375 for (i = pglob->gl_offs; --i >= 0; ) 376 *--pathv = NULL; 377 } 378 pglob->gl_pathv = pathv; 379 380 for (p = path; *p++;); 381 if ((copy = malloc(p - path)) != NULL) { 382 g_Ctoc(path, copy); 383 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 384 } 385 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 386 return(copy == NULL ? GLOB_NOSPACE : 0); 387 } 388 389 390 /* 391 * pattern matching function for filenames. Each occurrence of the * 392 * pattern causes a recursion level. 393 */ 394 static 395 match(name, pat, patend) 396 register Char *name, *pat, *patend; 397 { 398 int ok, negate_range; 399 Char c, k; 400 401 while (pat < patend) { 402 c = *pat++; 403 switch (c & M_MASK) { 404 case M_ALL: 405 if (pat == patend) 406 return(1); 407 for (; *name != EOS; ++name) 408 if (match(name, pat, patend)) 409 return(1); 410 return(0); 411 case M_ONE: 412 if (*name++ == EOS) 413 return(0); 414 break; 415 case M_SET: 416 ok = 0; 417 k = *name++; 418 if (negate_range = ((*pat & M_MASK) == M_NOT)) 419 ++pat; 420 while (((c = *pat++) & M_MASK) != M_END) 421 if ((*pat & M_MASK) == M_RNG) { 422 if (c <= k && k <= pat[1]) 423 ok = 1; 424 pat += 2; 425 } else if (c == k) 426 ok = 1; 427 if (ok == negate_range) 428 return(0); 429 break; 430 default: 431 if (*name++ != c) 432 return(0); 433 break; 434 } 435 } 436 return(*name == EOS); 437 } 438 439 /* Free allocated data belonging to a glob_t structure. */ 440 void 441 globfree(pglob) 442 glob_t *pglob; 443 { 444 register int i; 445 register char **pp; 446 447 if (pglob->gl_pathv != NULL) { 448 pp = pglob->gl_pathv + pglob->gl_offs; 449 for (i = pglob->gl_pathc; i--; ++pp) 450 if (*pp) 451 free(*pp); 452 free(pglob->gl_pathv); 453 } 454 } 455 456 static DIR * 457 g_opendir(str) 458 register Char *str; 459 { 460 char buf[MAXPATHLEN]; 461 462 if (!*str) 463 return(opendir(".")); 464 g_Ctoc(str, buf); 465 return(opendir(buf)); 466 } 467 468 static int 469 g_lstat(fn, sb) 470 register Char *fn; 471 struct stat *sb; 472 { 473 char buf[MAXPATHLEN]; 474 475 g_Ctoc(fn, buf); 476 return(lstat(buf, sb)); 477 } 478 479 static int 480 g_stat(fn, sb) 481 register Char *fn; 482 struct stat *sb; 483 { 484 char buf[MAXPATHLEN]; 485 486 g_Ctoc(fn, buf); 487 return(stat(buf, sb)); 488 } 489 490 static Char * 491 g_strchr(str, ch) 492 Char *str; 493 int ch; 494 { 495 do { 496 if (*str == ch) 497 return (str); 498 } while (*str++); 499 return (NULL); 500 } 501 502 static void 503 g_Ctoc(str, buf) 504 register Char *str; 505 char *buf; 506 { 507 register char *dc; 508 509 for (dc = buf; *dc++ = *str++;); 510 } 511 512 #ifdef DEBUG 513 static void 514 qprintf(s) 515 register Char *s; 516 { 517 register Char *p; 518 519 for (p = s; *p; p++) 520 (void)printf("%c", *p & 0xff); 521 (void)printf("\n"); 522 for (p = s; *p; p++) 523 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 524 (void)printf("\n"); 525 for (p = s; *p; p++) 526 (void)printf("%c", *p & M_META ? '_' : ' '); 527 (void)printf("\n"); 528 } 529 #endif 530