1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.9 (Berkeley) 06/16/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * Glob: the interface is a superset of the one defined in POSIX 1003.2, 17 * draft 9. 18 * 19 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 20 * 21 * Optional extra services, controlled by flags not defined by POSIX: 22 * 23 * GLOB_QUOTE: 24 * Escaping convention: \ inhibits any special meaning the following 25 * character might have (except \ at end of string is retained). 26 * GLOB_MAGCHAR: 27 * Set in gl_flags if pattern contained a globbing character. 28 * gl_matchc: 29 * Number of matches in the current invocation of glob. 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/stat.h> 35 #include <dirent.h> 36 #include <glob.h> 37 #include <ctype.h> 38 #include <errno.h> 39 #include <string.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 43 #define DOLLAR '$' 44 #define DOT '.' 45 #define EOS '\0' 46 #define LBRACKET '[' 47 #define NOT '!' 48 #define QUESTION '?' 49 #define QUOTE '\\' 50 #define RANGE '-' 51 #define RBRACKET ']' 52 #define SEP '/' 53 #define STAR '*' 54 #define TILDE '~' 55 #define UNDERSCORE '_' 56 57 #define M_QUOTE 0x8000 58 #define M_PROTECT 0x4000 59 #define M_MASK 0xffff 60 61 #define META(c) ((c)|M_QUOTE) 62 #define M_ALL META('*') 63 #define M_END META(']') 64 #define M_NOT META('!') 65 #define M_ONE META('?') 66 #define M_RNG META('-') 67 #define M_SET META('[') 68 #define ismeta(c) (((c)&M_QUOTE) != 0) 69 70 typedef u_short shortchar_t; 71 72 static int glob1 __P((shortchar_t *, glob_t *)); 73 static int glob2 __P((shortchar_t *, shortchar_t *, shortchar_t *, glob_t *)); 74 static int glob3 __P((shortchar_t *, 75 shortchar_t *, shortchar_t *, shortchar_t *, glob_t *)); 76 static int globextend __P((shortchar_t *, glob_t *)); 77 static int match __P((shortchar_t *, shortchar_t *, shortchar_t *)); 78 static DIR *Opendir __P((shortchar_t *)); 79 static int Lstat __P((shortchar_t *, struct stat *)); 80 static int Stat __P((shortchar_t *, struct stat *)); 81 static shortchar_t *Strchr __P((shortchar_t *, int)); 82 #ifdef DEBUG 83 static void qprintf __P((shortchar_t *)); 84 #endif 85 86 static DIR * 87 Opendir(str) 88 register shortchar_t *str; 89 { 90 register char *dc; 91 char buf[MAXPATHLEN]; 92 93 if (!*str) 94 return(opendir(".")); 95 for (dc = buf; *dc++ = *str++;); 96 return(opendir(buf)); 97 } 98 99 static int 100 Lstat(fn, sb) 101 register shortchar_t *fn; 102 struct stat *sb; 103 { 104 register char *dc; 105 char buf[MAXPATHLEN]; 106 107 for (dc = buf; *dc++ = *fn++;); 108 return(lstat(buf, sb)); 109 } 110 111 static int 112 Stat(fn, sb) 113 register shortchar_t *fn; 114 struct stat *sb; 115 { 116 register char *dc; 117 char buf[MAXPATHLEN]; 118 119 for (dc = buf; *dc++ = *fn++;); 120 return(stat(buf, sb)); 121 } 122 123 static shortchar_t * 124 Strchr(str, ch) 125 shortchar_t *str; 126 int ch; 127 { 128 do { 129 if (*str == ch) 130 return (str); 131 } while (*str++); 132 return (NULL); 133 } 134 135 #ifdef DEBUG 136 static void 137 qprintf(s) 138 shortchar_t *s; 139 { 140 shortchar_t *p; 141 142 for (p = s; *p; p++) 143 printf("%c", *p & 0xff); 144 printf("\n"); 145 for (p = s; *p; p++) 146 printf("%c", *p & M_PROTECT ? '"' : ' '); 147 printf("\n"); 148 for (p = s; *p; p++) 149 printf("%c", *p & M_META ? '_' : ' '); 150 printf("\n"); 151 } 152 #endif 153 154 static int 155 compare(p, q) 156 void **p, **q; 157 { 158 return(strcmp(*(char **)p, *(char **)q)); 159 } 160 161 /* 162 * The main glob() routine: compiles the pattern (optionally processing 163 * quotes), calls glob1() to do the real pattern matching, and finally 164 * sorts the list (unless unsorted operation is requested). Returns 0 165 * if things went well, nonzero if errors occurred. It is not an error 166 * to find no matches. 167 */ 168 glob(pattern, flags, errfunc, pglob) 169 const char *pattern; 170 int flags; 171 int (*errfunc) __P((char *, int)); 172 glob_t *pglob; 173 { 174 int err, oldpathc; 175 shortchar_t *bufnext, *bufend, *compilebuf; 176 const char *compilepat, *patnext; 177 char c; 178 shortchar_t patbuf[MAXPATHLEN+1], *qpatnext; 179 180 patnext = pattern; 181 if (!(flags & GLOB_APPEND)) { 182 pglob->gl_pathc = 0; 183 pglob->gl_pathv = NULL; 184 if (!(flags & GLOB_DOOFFS)) 185 pglob->gl_offs = 0; 186 } 187 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 188 pglob->gl_errfunc = errfunc; 189 oldpathc = pglob->gl_pathc; 190 pglob->gl_matchc = 0; 191 192 bufnext = patbuf; 193 bufend = bufnext + MAXPATHLEN; 194 if (flags & GLOB_QUOTE) { 195 /* Protect the quoted characters */ 196 while (bufnext < bufend && (c = *patnext++) != EOS) 197 if (c == QUOTE) { 198 if ((c = *patnext++) == EOS) { 199 c = QUOTE; 200 --patnext; 201 } 202 *bufnext++ = c | M_PROTECT; 203 } 204 else 205 *bufnext++ = c; 206 } 207 else 208 while (bufnext < bufend && (c = *patnext++) != EOS) 209 *bufnext++ = c; 210 *bufnext = EOS; 211 212 bufnext = patbuf; 213 qpatnext = patbuf; 214 compilebuf = bufnext; 215 compilepat = patnext; 216 /* we don't need to check for buffer overflow any more */ 217 while ((c = *qpatnext++) != EOS) { 218 switch (c) { 219 case LBRACKET: 220 pglob->gl_flags |= GLOB_MAGCHAR; 221 c = *qpatnext; 222 if (c == NOT) 223 ++qpatnext; 224 if (*qpatnext == EOS || 225 Strchr(qpatnext+1, RBRACKET) == NULL) { 226 *bufnext++ = LBRACKET; 227 if (c == NOT) 228 --qpatnext; 229 break; 230 } 231 *bufnext++ = M_SET; 232 if (c == NOT) 233 *bufnext++ = M_NOT; 234 c = *qpatnext++; 235 do { 236 *bufnext++ = c; 237 if (*qpatnext == RANGE && 238 (c = qpatnext[1]) != RBRACKET) { 239 *bufnext++ = M_RNG; 240 *bufnext++ = c; 241 qpatnext += 2; 242 } 243 } while ((c = *qpatnext++) != RBRACKET); 244 *bufnext++ = M_END; 245 break; 246 case QUESTION: 247 pglob->gl_flags |= GLOB_MAGCHAR; 248 *bufnext++ = M_ONE; 249 break; 250 case STAR: 251 pglob->gl_flags |= GLOB_MAGCHAR; 252 *bufnext++ = M_ALL; 253 break; 254 default: 255 *bufnext++ = c; 256 break; 257 } 258 } 259 *bufnext = EOS; 260 #ifdef DEBUG 261 qprintf(patbuf); 262 #endif 263 264 if ((err = glob1(patbuf, pglob)) != 0) 265 return(err); 266 267 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 268 if (!(flags & GLOB_QUOTE)) { 269 shortchar_t *dp = compilebuf; 270 const char *sp = compilepat; 271 while (*dp++ = (u_char)*sp++); 272 } 273 else { 274 /* 275 * copy pattern, interpreting quotes; this is slightly 276 * different than the interpretation of quotes above 277 * -- which should prevail? 278 */ 279 while (*compilepat != EOS) { 280 if (*compilepat == QUOTE) { 281 if (*++compilepat == EOS) 282 --compilepat; 283 } 284 *compilebuf++ = (u_char)*compilepat++; 285 } 286 *compilebuf = EOS; 287 } 288 return(globextend(patbuf, pglob)); 289 } else if (!(flags & GLOB_NOSORT)) 290 qsort((char*)(pglob->gl_pathv + pglob->gl_offs + oldpathc), 291 pglob->gl_pathc - oldpathc, sizeof(char*), compare); 292 return(0); 293 } 294 295 static 296 glob1(pattern, pglob) 297 shortchar_t *pattern; 298 glob_t *pglob; 299 { 300 shortchar_t pathbuf[MAXPATHLEN+1]; 301 302 /* 303 * A null pathname is invalid -- POSIX 1003.1 sect. 2.4. 304 */ 305 if (*pattern == EOS) 306 return(0); 307 return(glob2(pathbuf, pathbuf, pattern, pglob)); 308 } 309 310 /* 311 * functions glob2 and glob3 are mutually recursive; there is one level 312 * of recursion for each segment in the pattern that contains one or 313 * more meta characters. 314 */ 315 static 316 glob2(pathbuf, pathend, pattern, pglob) 317 shortchar_t *pathbuf, *pathend, *pattern; 318 glob_t *pglob; 319 { 320 struct stat sbuf; 321 shortchar_t *p, *q; 322 int anymeta; 323 324 /* 325 * loop over pattern segments until end of pattern or until 326 * segment with meta character found. 327 */ 328 for (anymeta = 0;;) { 329 if (*pattern == EOS) { /* end of pattern? */ 330 *pathend = EOS; 331 if (Lstat(pathbuf, &sbuf)) 332 return(0); 333 334 if (((pglob->gl_flags & GLOB_MARK) && 335 pathend[-1] != SEP) && (S_ISDIR(sbuf.st_mode) 336 || (S_ISLNK(sbuf.st_mode) && 337 (Stat(pathbuf, &sbuf) == 0) && 338 S_ISDIR(sbuf.st_mode)))) { 339 *pathend++ = SEP; 340 *pathend = EOS; 341 } 342 ++pglob->gl_matchc; 343 return(globextend(pathbuf, pglob)); 344 } 345 346 /* find end of next segment, copy tentatively to pathend */ 347 q = pathend; 348 p = pattern; 349 while (*p != EOS && *p != SEP) { 350 if (ismeta(*p)) 351 anymeta = 1; 352 *q++ = *p++; 353 } 354 355 if (!anymeta) { /* no expansion, do next segment */ 356 pathend = q; 357 pattern = p; 358 while (*pattern == SEP) 359 *pathend++ = *pattern++; 360 } else /* need expansion, recurse */ 361 return(glob3(pathbuf, pathend, pattern, p, pglob)); 362 } 363 /* NOTREACHED */ 364 } 365 366 static 367 glob3(pathbuf, pathend, pattern, restpattern, pglob) 368 shortchar_t *pathbuf, *pathend, *pattern, *restpattern; 369 glob_t *pglob; 370 { 371 extern int errno; 372 DIR *dirp; 373 struct dirent *dp; 374 int len, err; 375 376 *pathend = EOS; 377 errno = 0; 378 379 if (!(dirp = Opendir(pathbuf))) 380 /* todo: don't call for ENOENT or ENOTDIR? */ 381 if (pglob->gl_errfunc && 382 (*pglob->gl_errfunc)(pathbuf, errno) || 383 (pglob->gl_flags & GLOB_ERR)) 384 return(GLOB_ABEND); 385 else 386 return(0); 387 388 err = 0; 389 390 /* search directory for matching names */ 391 while ((dp = readdir(dirp))) { 392 register char *sc; 393 register shortchar_t *dc; 394 /* initial DOT must be matched literally */ 395 if (dp->d_name[0] == DOT && *pattern != DOT) 396 continue; 397 for (sc = dp->d_name, dc = pathend; 398 *dc++ = (u_char)*sc++;); 399 if (!match(pathend, pattern, restpattern)) { 400 *pathend = EOS; 401 continue; 402 } 403 err = glob2(pathbuf, --dc, restpattern, pglob); 404 if (err) 405 break; 406 } 407 /* todo: check error from readdir? */ 408 (void)closedir(dirp); 409 return(err); 410 } 411 412 413 /* 414 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 415 * add the new item, and update gl_pathc. 416 * 417 * This assumes the BSD realloc, which only copies the block when its size 418 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 419 * behavior. 420 * 421 * Return 0 if new item added, error code if memory couldn't be allocated. 422 * 423 * Invariant of the glob_t structure: 424 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 425 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 426 */ 427 static int 428 globextend(path, pglob) 429 shortchar_t *path; 430 glob_t *pglob; 431 { 432 register char **pathv; 433 register int i; 434 u_int newsize; 435 char *copy; 436 shortchar_t *p; 437 438 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 439 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 440 if (pathv == NULL) 441 return(GLOB_NOSPACE); 442 443 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 444 /* first time around -- clear initial gl_offs items */ 445 pathv += pglob->gl_offs; 446 for (i = pglob->gl_offs; --i >= 0; ) 447 *--pathv = NULL; 448 } 449 pglob->gl_pathv = pathv; 450 451 for (p = path; *p++;); 452 if ((copy = malloc(p - path)) != NULL) { 453 register char *dc; 454 register shortchar_t *sc; 455 456 for (dc = copy, sc = path; *dc++ = *sc++;); 457 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 458 } 459 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 460 return((copy == NULL) ? GLOB_NOSPACE : 0); 461 } 462 463 464 /* 465 * pattern matching function for filenames. Each occurrence of the * 466 * pattern causes a recursion level. 467 */ 468 static 469 match(name, pat, patend) 470 register shortchar_t *name, *pat, *patend; 471 { 472 int ok, negate_range; 473 shortchar_t c, k; 474 475 while (pat < patend) { 476 c = *pat++; 477 switch (c & M_MASK) { 478 case M_ALL: 479 if (pat == patend) 480 return(1); 481 for (; *name != EOS; ++name) { 482 if (match(name, pat, patend)) 483 return(1); 484 } 485 return(0); 486 case M_ONE: 487 if (*name++ == EOS) 488 return(0); 489 break; 490 case M_SET: 491 ok = 0; 492 k = *name++; 493 if (negate_range = ((*pat & M_MASK) == M_NOT)) 494 ++pat; 495 while (((c = *pat++) & M_MASK) != M_END) { 496 if ((*pat & M_MASK) == M_RNG) { 497 if (c <= k && k <= pat[1]) 498 ok = 1; 499 pat += 2; 500 } 501 else if (c == k) 502 ok = 1; 503 } 504 if (ok == negate_range) 505 return(0); 506 break; 507 default: 508 if (*name++ != c) 509 return(0); 510 break; 511 } 512 } 513 return(*name == EOS); 514 } 515 516 /* free allocated data belonging to a glob_t structure */ 517 void 518 globfree(pglob) 519 glob_t *pglob; 520 { 521 register int i; 522 register char **pp; 523 524 if (pglob->gl_pathv != NULL) { 525 pp = pglob->gl_pathv + pglob->gl_offs; 526 for (i = pglob->gl_pathc; i--; ++pp) 527 if (*pp) 528 free(pp); 529 free(pglob->gl_pathv); 530 } 531 } 532