1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 /* 42 * glob(3) -- a superset of the one defined in POSIX 1003.2. 43 * 44 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 45 * 46 * Optional extra services, controlled by flags not defined by POSIX: 47 * 48 * GLOB_QUOTE: 49 * Escaping convention: \ inhibits any special meaning the following 50 * character might have (except \ at end of string is retained). 51 * GLOB_MAGCHAR: 52 * Set in gl_flags if pattern contained a globbing character. 53 * gl_matchc: 54 * Number of matches in the current invocation of glob. 55 */ 56 57 #include <sys/cdefs.h> 58 #include <sys/param.h> 59 #include <sys/stat.h> 60 #include <dirent.h> 61 #include <glob.h> 62 #include <ctype.h> 63 #include <errno.h> 64 #include <string.h> 65 #include <stdio.h> 66 #include <stdlib.h> 67 68 #define DOLLAR '$' 69 #define DOT '.' 70 #define EOS '\0' 71 #define LBRACKET '[' 72 #define NOT '!' 73 #define QUESTION '?' 74 #define QUOTE '\\' 75 #define RANGE '-' 76 #define RBRACKET ']' 77 #define SEP '/' 78 #define STAR '*' 79 #define TILDE '~' 80 #define UNDERSCORE '_' 81 82 #define M_QUOTE 0x8000 83 #define M_PROTECT 0x4000 84 #define M_MASK 0xffff 85 #define M_ASCII 0x00ff 86 87 #define CHAR(c) ((c)&M_ASCII) 88 #define META(c) ((c)|M_QUOTE) 89 #define M_ALL META('*') 90 #define M_END META(']') 91 #define M_NOT META('!') 92 #define M_ONE META('?') 93 #define M_RNG META('-') 94 #define M_SET META('[') 95 #define ismeta(c) (((c)&M_QUOTE) != 0) 96 97 typedef u_short Char; 98 99 static int compare __P((const void *, const void *)); 100 static void g_Ctoc __P((Char *, char *)); 101 static int g_lstat __P((Char *, struct stat *)); 102 static DIR *g_opendir __P((Char *)); 103 static Char *g_strchr __P((Char *, int)); 104 static int g_stat __P((Char *, struct stat *)); 105 static int glob1 __P((Char *, glob_t *)); 106 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 107 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 108 static int globextend __P((Char *, glob_t *)); 109 static int match __P((Char *, Char *, Char *)); 110 #ifdef DEBUG 111 static void qprintf __P((Char *)); 112 #endif 113 114 /* 115 * The main glob() routine: compiles the pattern (optionally processing 116 * quotes), calls glob1() to do the real pattern matching, and finally 117 * sorts the list (unless unsorted operation is requested). Returns 0 118 * if things went well, nonzero if errors occurred. It is not an error 119 * to find no matches. 120 */ 121 glob(pattern, flags, errfunc, pglob) 122 const char *pattern; 123 int flags, (*errfunc) __P((char *, int)); 124 glob_t *pglob; 125 { 126 const u_char *compilepat, *patnext; 127 int c, err, oldpathc; 128 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 129 130 patnext = (u_char *) pattern; 131 if (!(flags & GLOB_APPEND)) { 132 pglob->gl_pathc = 0; 133 pglob->gl_pathv = NULL; 134 if (!(flags & GLOB_DOOFFS)) 135 pglob->gl_offs = 0; 136 } 137 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 138 pglob->gl_errfunc = errfunc; 139 oldpathc = pglob->gl_pathc; 140 pglob->gl_matchc = 0; 141 142 bufnext = patbuf; 143 bufend = bufnext + MAXPATHLEN; 144 compilebuf = bufnext; 145 compilepat = patnext; 146 if (flags & GLOB_QUOTE) { 147 /* Protect the quoted characters. */ 148 while (bufnext < bufend && (c = *patnext++) != EOS) 149 if (c == QUOTE) { 150 if ((c = *patnext++) == EOS) { 151 c = QUOTE; 152 --patnext; 153 } 154 *bufnext++ = c | M_PROTECT; 155 } 156 else 157 *bufnext++ = c; 158 } 159 else 160 while (bufnext < bufend && (c = *patnext++) != EOS) 161 *bufnext++ = c; 162 *bufnext = EOS; 163 164 bufnext = patbuf; 165 qpatnext = patbuf; 166 /* We don't need to check for buffer overflow any more. */ 167 while ((c = *qpatnext++) != EOS) { 168 switch (c) { 169 case LBRACKET: 170 pglob->gl_flags |= GLOB_MAGCHAR; 171 c = *qpatnext; 172 if (c == NOT) 173 ++qpatnext; 174 if (*qpatnext == EOS || 175 g_strchr(qpatnext+1, RBRACKET) == NULL) { 176 *bufnext++ = LBRACKET; 177 if (c == NOT) 178 --qpatnext; 179 break; 180 } 181 *bufnext++ = M_SET; 182 if (c == NOT) 183 *bufnext++ = M_NOT; 184 c = *qpatnext++; 185 do { 186 *bufnext++ = CHAR(c); 187 if (*qpatnext == RANGE && 188 (c = qpatnext[1]) != RBRACKET) { 189 *bufnext++ = M_RNG; 190 *bufnext++ = CHAR(c); 191 qpatnext += 2; 192 } 193 } while ((c = *qpatnext++) != RBRACKET); 194 *bufnext++ = M_END; 195 break; 196 case QUESTION: 197 pglob->gl_flags |= GLOB_MAGCHAR; 198 *bufnext++ = M_ONE; 199 break; 200 case STAR: 201 pglob->gl_flags |= GLOB_MAGCHAR; 202 *bufnext++ = M_ALL; 203 break; 204 default: 205 *bufnext++ = CHAR(c); 206 break; 207 } 208 } 209 *bufnext = EOS; 210 #ifdef DEBUG 211 qprintf(patbuf); 212 #endif 213 214 if ((err = glob1(patbuf, pglob)) != 0) 215 return(err); 216 217 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 218 if (!(flags & GLOB_QUOTE)) { 219 Char *dp = compilebuf; 220 const u_char *sp = compilepat; 221 while (*dp++ = *sp++); 222 } 223 else { 224 /* 225 * Copy pattern, interpreting quotes; this is slightly 226 * different than the interpretation of quotes above 227 * -- which should prevail? 228 */ 229 while (*compilepat != EOS) { 230 if (*compilepat == QUOTE) { 231 if (*++compilepat == EOS) 232 --compilepat; 233 } 234 *compilebuf++ = (u_char)*compilepat++; 235 } 236 *compilebuf = EOS; 237 } 238 return(globextend(patbuf, pglob)); 239 } else if (!(flags & GLOB_NOSORT)) 240 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 241 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 242 return(0); 243 } 244 245 static int 246 compare(p, q) 247 const void *p, *q; 248 { 249 return(strcmp(*(char **)p, *(char **)q)); 250 } 251 252 static 253 glob1(pattern, pglob) 254 Char *pattern; 255 glob_t *pglob; 256 { 257 Char pathbuf[MAXPATHLEN+1]; 258 259 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 260 if (*pattern == EOS) 261 return(0); 262 return(glob2(pathbuf, pathbuf, pattern, pglob)); 263 } 264 265 /* 266 * The functions glob2 and glob3 are mutually recursive; there is one level 267 * of recursion for each segment in the pattern that contains one or more 268 * meta characters. 269 */ 270 static 271 glob2(pathbuf, pathend, pattern, pglob) 272 Char *pathbuf, *pathend, *pattern; 273 glob_t *pglob; 274 { 275 struct stat sb; 276 Char *p, *q; 277 int anymeta; 278 279 /* 280 * Loop over pattern segments until end of pattern or until 281 * segment with meta character found. 282 */ 283 for (anymeta = 0;;) { 284 if (*pattern == EOS) { /* End of pattern? */ 285 *pathend = EOS; 286 if (g_stat(pathbuf, &sb)) 287 return(0); 288 289 if (((pglob->gl_flags & GLOB_MARK) && 290 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 291 || (S_ISLNK(sb.st_mode) && 292 (g_stat(pathbuf, &sb) == 0) && 293 S_ISDIR(sb.st_mode)))) { 294 *pathend++ = SEP; 295 *pathend = EOS; 296 } 297 ++pglob->gl_matchc; 298 return(globextend(pathbuf, pglob)); 299 } 300 301 /* Find end of next segment, copy tentatively to pathend. */ 302 q = pathend; 303 p = pattern; 304 while (*p != EOS && *p != SEP) { 305 if (ismeta(*p)) 306 anymeta = 1; 307 *q++ = *p++; 308 } 309 310 if (!anymeta) { /* No expansion, do next segment. */ 311 pathend = q; 312 pattern = p; 313 while (*pattern == SEP) 314 *pathend++ = *pattern++; 315 } else /* Need expansion, recurse. */ 316 return(glob3(pathbuf, pathend, pattern, p, pglob)); 317 } 318 /* NOTREACHED */ 319 } 320 321 static 322 glob3(pathbuf, pathend, pattern, restpattern, pglob) 323 Char *pathbuf, *pathend, *pattern, *restpattern; 324 glob_t *pglob; 325 { 326 register struct dirent *dp; 327 DIR *dirp; 328 int len, err; 329 330 *pathend = EOS; 331 errno = 0; 332 333 if (!(dirp = g_opendir(pathbuf))) 334 /* TODO: don't call for ENOENT or ENOTDIR? */ 335 if (pglob->gl_errfunc && 336 (*pglob->gl_errfunc)(pathbuf, errno) || 337 (pglob->gl_flags & GLOB_ERR)) 338 return(GLOB_ABEND); 339 else 340 return(0); 341 342 err = 0; 343 344 /* Search directory for matching names. */ 345 while ((dp = readdir(dirp))) { 346 register u_char *sc; 347 register Char *dc; 348 349 /* Initial DOT must be matched literally. */ 350 if (dp->d_name[0] == DOT && *pattern != DOT) 351 continue; 352 for (sc = (u_char *) dp->d_name, dc = pathend; 353 *dc++ = *sc++;); 354 if (!match(pathend, pattern, restpattern)) { 355 *pathend = EOS; 356 continue; 357 } 358 err = glob2(pathbuf, --dc, restpattern, pglob); 359 if (err) 360 break; 361 } 362 363 /* TODO: check error from readdir? */ 364 (void)closedir(dirp); 365 return(err); 366 } 367 368 369 /* 370 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 371 * add the new item, and update gl_pathc. 372 * 373 * This assumes the BSD realloc, which only copies the block when its size 374 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 375 * behavior. 376 * 377 * Return 0 if new item added, error code if memory couldn't be allocated. 378 * 379 * Invariant of the glob_t structure: 380 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 381 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 382 */ 383 static int 384 globextend(path, pglob) 385 Char *path; 386 glob_t *pglob; 387 { 388 register char **pathv; 389 register int i; 390 u_int newsize; 391 char *copy; 392 Char *p; 393 394 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 395 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 396 if (pathv == NULL) 397 return(GLOB_NOSPACE); 398 399 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 400 /* first time around -- clear initial gl_offs items */ 401 pathv += pglob->gl_offs; 402 for (i = pglob->gl_offs; --i >= 0; ) 403 *--pathv = NULL; 404 } 405 pglob->gl_pathv = pathv; 406 407 for (p = path; *p++;); 408 if ((copy = malloc(p - path)) != NULL) { 409 g_Ctoc(path, copy); 410 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 411 } 412 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 413 return(copy == NULL ? GLOB_NOSPACE : 0); 414 } 415 416 417 /* 418 * pattern matching function for filenames. Each occurrence of the * 419 * pattern causes a recursion level. 420 */ 421 static 422 match(name, pat, patend) 423 register Char *name, *pat, *patend; 424 { 425 int ok, negate_range; 426 Char c, k; 427 428 while (pat < patend) { 429 c = *pat++; 430 switch (c & M_MASK) { 431 case M_ALL: 432 if (pat == patend) 433 return(1); 434 for (; *name != EOS; ++name) 435 if (match(name, pat, patend)) 436 return(1); 437 return(0); 438 case M_ONE: 439 if (*name++ == EOS) 440 return(0); 441 break; 442 case M_SET: 443 ok = 0; 444 k = *name++; 445 if (negate_range = ((*pat & M_MASK) == M_NOT)) 446 ++pat; 447 while (((c = *pat++) & M_MASK) != M_END) 448 if ((*pat & M_MASK) == M_RNG) { 449 if (c <= k && k <= pat[1]) 450 ok = 1; 451 pat += 2; 452 } else if (c == k) 453 ok = 1; 454 if (ok == negate_range) 455 return(0); 456 break; 457 default: 458 if (*name++ != c) 459 return(0); 460 break; 461 } 462 } 463 return(*name == EOS); 464 } 465 466 /* Free allocated data belonging to a glob_t structure. */ 467 void 468 globfree(pglob) 469 glob_t *pglob; 470 { 471 register int i; 472 register char **pp; 473 474 if (pglob->gl_pathv != NULL) { 475 pp = pglob->gl_pathv + pglob->gl_offs; 476 for (i = pglob->gl_pathc; i--; ++pp) 477 if (*pp) 478 free(*pp); 479 free(pglob->gl_pathv); 480 } 481 } 482 483 static DIR * 484 g_opendir(str) 485 register Char *str; 486 { 487 char buf[MAXPATHLEN]; 488 489 if (!*str) 490 return(opendir(".")); 491 g_Ctoc(str, buf); 492 return(opendir(buf)); 493 } 494 495 static int 496 g_lstat(fn, sb) 497 register Char *fn; 498 struct stat *sb; 499 { 500 char buf[MAXPATHLEN]; 501 502 g_Ctoc(fn, buf); 503 return(lstat(buf, sb)); 504 } 505 506 static int 507 g_stat(fn, sb) 508 register Char *fn; 509 struct stat *sb; 510 { 511 char buf[MAXPATHLEN]; 512 513 g_Ctoc(fn, buf); 514 return(stat(buf, sb)); 515 } 516 517 static Char * 518 g_strchr(str, ch) 519 Char *str; 520 int ch; 521 { 522 do { 523 if (*str == ch) 524 return (str); 525 } while (*str++); 526 return (NULL); 527 } 528 529 static void 530 g_Ctoc(str, buf) 531 register Char *str; 532 char *buf; 533 { 534 register char *dc; 535 536 for (dc = buf; *dc++ = *str++;); 537 } 538 539 #ifdef DEBUG 540 static void 541 qprintf(s) 542 register Char *s; 543 { 544 register Char *p; 545 546 for (p = s; *p; p++) 547 (void)printf("%c", *p & 0xff); 548 (void)printf("\n"); 549 for (p = s; *p; p++) 550 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 551 (void)printf("\n"); 552 for (p = s; *p; p++) 553 (void)printf("%c", *p & M_META ? '_' : ' '); 554 (void)printf("\n"); 555 } 556 #endif 557