140087Sbostic /* 240087Sbostic * Copyright (c) 1989 The Regents of the University of California. 340087Sbostic * All rights reserved. 440087Sbostic * 540087Sbostic * This code is derived from software contributed to Berkeley by 640087Sbostic * Guido van Rossum. 740087Sbostic * 842625Sbostic * %sccs.include.redist.c% 940087Sbostic */ 1040087Sbostic 1140087Sbostic #if defined(LIBC_SCCS) && !defined(lint) 12*48540Sbostic static char sccsid[] = "@(#)glob.c 5.8 (Berkeley) 04/23/91"; 1340087Sbostic #endif /* LIBC_SCCS and not lint */ 1440087Sbostic 1540087Sbostic /* 1640087Sbostic * Glob: the interface is a superset of the one defined in POSIX 1003.2, 1740087Sbostic * draft 9. 1840087Sbostic * 1940087Sbostic * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 2040087Sbostic * 2140087Sbostic * Optional extra services, controlled by flags not defined by POSIX: 2247590Sbostic * 2347590Sbostic * GLOB_QUOTE: 2447590Sbostic * Escaping convention: \ inhibits any special meaning the following 2547590Sbostic * character might have (except \ at end of string is retained). 2647590Sbostic * GLOB_MAGCHAR: 27*48540Sbostic * Set in gl_flags if pattern contained a globbing character. 2847590Sbostic * gl_matchc: 2947590Sbostic * Number of matches in the current invocation of glob. 3040087Sbostic */ 3140087Sbostic 32*48540Sbostic #include <sys/cdefs.h> 3340087Sbostic #include <sys/param.h> 3440087Sbostic #include <sys/stat.h> 3540087Sbostic #include <dirent.h> 3640087Sbostic #include <glob.h> 3740087Sbostic #include <ctype.h> 3840087Sbostic #include <errno.h> 3940087Sbostic #include <string.h> 4040087Sbostic #include <stdio.h> 4146597Sdonn #include <stdlib.h> 4240087Sbostic 43*48540Sbostic static int glob1(), glob2(), glob3(), globextend(), match(); 4440087Sbostic 4540087Sbostic #define DOLLAR '$' 4640087Sbostic #define DOT '.' 4740087Sbostic #define EOS '\0' 4840087Sbostic #define LBRACKET '[' 4940087Sbostic #define NOT '!' 5040087Sbostic #define QUESTION '?' 5140087Sbostic #define QUOTE '\\' 5240087Sbostic #define RANGE '-' 5340087Sbostic #define RBRACKET ']' 5440087Sbostic #define SEP '/' 5540087Sbostic #define STAR '*' 5640087Sbostic #define TILDE '~' 5740087Sbostic #define UNDERSCORE '_' 5840087Sbostic 59*48540Sbostic #define SHORT_STRINGS 60*48540Sbostic #ifdef SHORT_STRINGS 61*48540Sbostic typedef u_short shortchar_t; 62*48540Sbostic #define METABIT 0x8000 63*48540Sbostic #define M_MASK 0xffff 64*48540Sbostic #else 65*48540Sbostic typedef char shortchar_t; 6640087Sbostic #define METABIT 0x80 67*48540Sbostic #define M_MASK 0xff 68*48540Sbostic #endif 69*48540Sbostic 7040087Sbostic #define META(c) ((c)|METABIT) 7140087Sbostic #define M_ALL META('*') 7240087Sbostic #define M_END META(']') 7340087Sbostic #define M_NOT META('!') 7440087Sbostic #define M_ONE META('?') 7540087Sbostic #define M_RNG META('-') 7640087Sbostic #define M_SET META('[') 7740087Sbostic #define ismeta(c) (((c)&METABIT) != 0) 7840087Sbostic 79*48540Sbostic #ifdef SHORT_STRINGS 80*48540Sbostic static DIR * 81*48540Sbostic Opendir(str) 82*48540Sbostic register shortchar_t *str; 83*48540Sbostic { 84*48540Sbostic register char *dc; 85*48540Sbostic char buf[MAXPATHLEN]; 86*48540Sbostic 87*48540Sbostic if (!*str) 88*48540Sbostic return(opendir(".")); 89*48540Sbostic for (dc = buf; *dc++ = *str++;); 90*48540Sbostic return(opendir(buf)); 91*48540Sbostic } 92*48540Sbostic 93*48540Sbostic static int 94*48540Sbostic Lstat(fn, sb) 95*48540Sbostic register shortchar_t *fn; 96*48540Sbostic struct stat *sb; 97*48540Sbostic { 98*48540Sbostic register char *dc; 99*48540Sbostic char buf[MAXPATHLEN]; 100*48540Sbostic 101*48540Sbostic for (dc = buf; *dc++ = *fn++;); 102*48540Sbostic return(lstat(buf, sb)); 103*48540Sbostic } 104*48540Sbostic 105*48540Sbostic static int 106*48540Sbostic Stat(fn, sb) 107*48540Sbostic register shortchar_t *fn; 108*48540Sbostic struct stat *sb; 109*48540Sbostic { 110*48540Sbostic register char *dc; 111*48540Sbostic char buf[MAXPATHLEN]; 112*48540Sbostic 113*48540Sbostic for (dc = buf; *dc++ = *fn++;); 114*48540Sbostic return(stat(buf, sb)); 115*48540Sbostic } 116*48540Sbostic #else 117*48540Sbostic #define Opendir opendir 118*48540Sbostic #define Stat stat 119*48540Sbostic #define Lstat lstat 120*48540Sbostic #endif 121*48540Sbostic 122*48540Sbostic static int 12340087Sbostic compare(p, q) 12443531Sbostic void **p, **q; 12540087Sbostic { 12643531Sbostic return(strcmp(*(char **)p, *(char **)q)); 12740087Sbostic } 12840087Sbostic 12940087Sbostic /* 13040087Sbostic * The main glob() routine: compiles the pattern (optionally processing 13140087Sbostic * quotes), calls glob1() to do the real pattern matching, and finally 13240087Sbostic * sorts the list (unless unsorted operation is requested). Returns 0 13340087Sbostic * if things went well, nonzero if errors occurred. It is not an error 13440087Sbostic * to find no matches. 13540087Sbostic */ 13640087Sbostic glob(pattern, flags, errfunc, pglob) 13746597Sdonn const char *pattern; 13846597Sdonn int flags; 13946597Sdonn int (*errfunc) __P((char *, int)); 14040087Sbostic glob_t *pglob; 14140087Sbostic { 14240087Sbostic int err, oldpathc; 143*48540Sbostic shortchar_t *bufnext, *bufend, *compilebuf; 14446597Sdonn const char *compilepat, *patnext; 145*48540Sbostic char c; 146*48540Sbostic shortchar_t patbuf[MAXPATHLEN+1]; 14740087Sbostic 14840087Sbostic patnext = pattern; 14940087Sbostic if (!(flags & GLOB_APPEND)) { 15040087Sbostic pglob->gl_pathc = 0; 15140087Sbostic pglob->gl_pathv = NULL; 15240087Sbostic if (!(flags & GLOB_DOOFFS)) 15340087Sbostic pglob->gl_offs = 0; 15440087Sbostic } 15547590Sbostic pglob->gl_flags = flags & ~GLOB_MAGCHAR; 15640087Sbostic pglob->gl_errfunc = errfunc; 15740087Sbostic oldpathc = pglob->gl_pathc; 15847590Sbostic pglob->gl_matchc = 0; 15940087Sbostic 16040087Sbostic bufnext = patbuf; 16140087Sbostic bufend = bufnext+MAXPATHLEN; 16240087Sbostic 16340087Sbostic compilebuf = bufnext; 16440087Sbostic compilepat = patnext; 16540087Sbostic while (bufnext < bufend && (c = *patnext++) != EOS) { 16640087Sbostic switch (c) { 16740087Sbostic case LBRACKET: 16847590Sbostic pglob->gl_flags |= GLOB_MAGCHAR; 16940087Sbostic c = *patnext; 17040087Sbostic if (c == NOT) 17140087Sbostic ++patnext; 17240087Sbostic if (*patnext == EOS || 17340087Sbostic strchr(patnext+1, RBRACKET) == NULL) { 17440087Sbostic *bufnext++ = LBRACKET; 17540087Sbostic if (c == NOT) 17640087Sbostic --patnext; 17740087Sbostic break; 17840087Sbostic } 17940087Sbostic *bufnext++ = M_SET; 18040087Sbostic if (c == NOT) 18140087Sbostic *bufnext++ = M_NOT; 18240087Sbostic c = *patnext++; 18340087Sbostic do { 18440087Sbostic /* todo: quoting */ 18540087Sbostic *bufnext++ = c; 18640087Sbostic if (*patnext == RANGE && 18740087Sbostic (c = patnext[1]) != RBRACKET) { 18840087Sbostic *bufnext++ = M_RNG; 18940087Sbostic *bufnext++ = c; 19040087Sbostic patnext += 2; 19140087Sbostic } 19240087Sbostic } while ((c = *patnext++) != RBRACKET); 19340087Sbostic *bufnext++ = M_END; 19440087Sbostic break; 19540087Sbostic case QUESTION: 19647590Sbostic pglob->gl_flags |= GLOB_MAGCHAR; 19740087Sbostic *bufnext++ = M_ONE; 19840087Sbostic break; 19940087Sbostic case QUOTE: 20040087Sbostic if (!(flags & GLOB_QUOTE)) 20140087Sbostic *bufnext++ = QUOTE; 20240087Sbostic else { 20340087Sbostic if ((c = *patnext++) == EOS) { 20440087Sbostic c = QUOTE; 20540087Sbostic --patnext; 20640087Sbostic } 20740087Sbostic *bufnext++ = c; 20840087Sbostic } 20940087Sbostic break; 21040087Sbostic case STAR: 21147590Sbostic pglob->gl_flags |= GLOB_MAGCHAR; 21240087Sbostic *bufnext++ = M_ALL; 21340087Sbostic break; 21440087Sbostic default: 21540087Sbostic *bufnext++ = c; 21640087Sbostic break; 21740087Sbostic } 21840087Sbostic } 21940087Sbostic *bufnext = EOS; 22040087Sbostic 22140087Sbostic if ((err = glob1(patbuf, pglob)) != 0) 22240087Sbostic return(err); 22340087Sbostic 22440087Sbostic if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 225*48540Sbostic if (!(flags & GLOB_QUOTE)) { 226*48540Sbostic shortchar_t *dp = compilebuf; 227*48540Sbostic const char *sp = compilepat; 228*48540Sbostic while (*dp++ = (u_char)*sp++); 229*48540Sbostic } 23040087Sbostic else { 23140087Sbostic /* 23240087Sbostic * copy pattern, interpreting quotes; this is slightly 23340087Sbostic * different than the interpretation of quotes above 23440087Sbostic * -- which should prevail? 23540087Sbostic */ 23640087Sbostic while (*compilepat != EOS) { 23740087Sbostic if (*compilepat == QUOTE) { 23840087Sbostic if (*++compilepat == EOS) 23940087Sbostic --compilepat; 24040087Sbostic } 241*48540Sbostic *compilebuf++ = (u_char)*compilepat++; 24240087Sbostic } 24340087Sbostic *compilebuf = EOS; 24440087Sbostic } 24540087Sbostic return(globextend(patbuf, pglob)); 246*48540Sbostic } else if (!(flags & GLOB_NOSORT)) 247*48540Sbostic qsort((char*)(pglob->gl_pathv + pglob->gl_offs + oldpathc), 24840087Sbostic pglob->gl_pathc - oldpathc, sizeof(char*), compare); 24940087Sbostic return(0); 25040087Sbostic } 25140087Sbostic 25240087Sbostic static 25340087Sbostic glob1(pattern, pglob) 254*48540Sbostic shortchar_t *pattern; 25540087Sbostic glob_t *pglob; 25640087Sbostic { 257*48540Sbostic shortchar_t pathbuf[MAXPATHLEN+1]; 25840087Sbostic 25940087Sbostic /* 260*48540Sbostic * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. 261*48540Sbostic */ 26240087Sbostic if (*pattern == EOS) 26340087Sbostic return(0); 26440087Sbostic return(glob2(pathbuf, pathbuf, pattern, pglob)); 26540087Sbostic } 26640087Sbostic 26740087Sbostic /* 26840087Sbostic * functions glob2 and glob3 are mutually recursive; there is one level 26940087Sbostic * of recursion for each segment in the pattern that contains one or 27040087Sbostic * more meta characters. 27140087Sbostic */ 27240087Sbostic static 27340087Sbostic glob2(pathbuf, pathend, pattern, pglob) 274*48540Sbostic shortchar_t *pathbuf, *pathend, *pattern; 27540087Sbostic glob_t *pglob; 27640087Sbostic { 277*48540Sbostic struct stat sbuf; 278*48540Sbostic shortchar_t *p, *q; 279*48540Sbostic int anymeta; 28040087Sbostic 28140087Sbostic /* 28240087Sbostic * loop over pattern segments until end of pattern or until 28340087Sbostic * segment with meta character found. 28440087Sbostic */ 28548357Sbostic for (anymeta = 0;;) { 28640087Sbostic if (*pattern == EOS) { /* end of pattern? */ 28740087Sbostic *pathend = EOS; 288*48540Sbostic if (Lstat(pathbuf, &sbuf)) 28948357Sbostic return(0); 290*48540Sbostic 291*48540Sbostic if (((pglob->gl_flags & GLOB_MARK) && 292*48540Sbostic pathend[-1] != SEP) && (S_ISDIR(sbuf.st_mode) 293*48540Sbostic || (S_ISLNK(sbuf.st_mode) && 294*48540Sbostic (Stat(pathbuf, &sbuf) == 0) && 295*48540Sbostic S_ISDIR(sbuf.st_mode)))) { 29640087Sbostic *pathend++ = SEP; 29740087Sbostic *pathend = EOS; 29840087Sbostic } 29947590Sbostic ++pglob->gl_matchc; 30040087Sbostic return(globextend(pathbuf, pglob)); 30140087Sbostic } 30240087Sbostic 30340087Sbostic /* find end of next segment, copy tentatively to pathend */ 30440087Sbostic q = pathend; 30540087Sbostic p = pattern; 30640087Sbostic while (*p != EOS && *p != SEP) { 30740087Sbostic if (ismeta(*p)) 30840087Sbostic anymeta = 1; 30940087Sbostic *q++ = *p++; 31040087Sbostic } 31140087Sbostic 31240087Sbostic if (!anymeta) { /* no expansion, do next segment */ 31340087Sbostic pathend = q; 31440087Sbostic pattern = p; 31540087Sbostic while (*pattern == SEP) 31640087Sbostic *pathend++ = *pattern++; 31740087Sbostic } else /* need expansion, recurse */ 31840087Sbostic return(glob3(pathbuf, pathend, pattern, p, pglob)); 31940087Sbostic } 32040087Sbostic /* NOTREACHED */ 32140087Sbostic } 32240087Sbostic 323*48540Sbostic 32440087Sbostic static 32540087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob) 326*48540Sbostic shortchar_t *pathbuf, *pathend, *pattern, *restpattern; 32740087Sbostic glob_t *pglob; 32840087Sbostic { 32940087Sbostic extern int errno; 33040087Sbostic DIR *dirp; 33140087Sbostic struct dirent *dp; 33240087Sbostic int len, err; 33340087Sbostic 33440087Sbostic *pathend = EOS; 33540087Sbostic errno = 0; 336*48540Sbostic 337*48540Sbostic if (!(dirp = Opendir(pathbuf))) 33840087Sbostic /* todo: don't call for ENOENT or ENOTDIR? */ 33940087Sbostic if (pglob->gl_errfunc && 34040087Sbostic (*pglob->gl_errfunc)(pathbuf, errno) || 34140087Sbostic (pglob->gl_flags & GLOB_ERR)) 34240087Sbostic return(GLOB_ABEND); 34340087Sbostic else 34440087Sbostic return(0); 34540087Sbostic 34640087Sbostic err = 0; 34740087Sbostic 34840087Sbostic /* search directory for matching names */ 34940087Sbostic while ((dp = readdir(dirp))) { 350*48540Sbostic register char *sc; 351*48540Sbostic register shortchar_t *dc; 35240087Sbostic /* initial DOT must be matched literally */ 35340087Sbostic if (dp->d_name[0] == DOT && *pattern != DOT) 35440087Sbostic continue; 355*48540Sbostic for (sc = dp->d_name, dc = pathend; 356*48540Sbostic *dc++ = (u_char)*sc++;); 357*48540Sbostic if (!match(pathend, pattern, restpattern)) { 358*48540Sbostic *pathend = EOS; 35940087Sbostic continue; 360*48540Sbostic } 361*48540Sbostic err = glob2(pathbuf, --dc, restpattern, pglob); 36240087Sbostic if (err) 36340087Sbostic break; 36440087Sbostic } 36540087Sbostic /* todo: check error from readdir? */ 36640087Sbostic (void)closedir(dirp); 36740087Sbostic return(err); 36840087Sbostic } 36940087Sbostic 37040087Sbostic 37140087Sbostic /* 37240087Sbostic * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 37340087Sbostic * add the new item, and update gl_pathc. 37440087Sbostic * 37540087Sbostic * This assumes the BSD realloc, which only copies the block when its size 37640087Sbostic * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 37740087Sbostic * behavior. 37840087Sbostic * 37940087Sbostic * Return 0 if new item added, error code if memory couldn't be allocated. 38040087Sbostic * 38140087Sbostic * Invariant of the glob_t structure: 38240087Sbostic * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 38340087Sbostic * gl_pathv points to (gl_offs + gl_pathc + 1) items. 38440087Sbostic */ 385*48540Sbostic static int 38640087Sbostic globextend(path, pglob) 387*48540Sbostic shortchar_t *path; 38840087Sbostic glob_t *pglob; 38940087Sbostic { 39040087Sbostic register char **pathv; 39140087Sbostic register int i; 392*48540Sbostic u_int newsize; 39340087Sbostic char *copy; 394*48540Sbostic shortchar_t *p; 39540087Sbostic 39640087Sbostic newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 397*48540Sbostic pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 39840087Sbostic if (pathv == NULL) 39940087Sbostic return(GLOB_NOSPACE); 40040087Sbostic 40140087Sbostic if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 40240087Sbostic /* first time around -- clear initial gl_offs items */ 40340087Sbostic pathv += pglob->gl_offs; 40440087Sbostic for (i = pglob->gl_offs; --i >= 0; ) 40540087Sbostic *--pathv = NULL; 40640087Sbostic } 40740087Sbostic pglob->gl_pathv = pathv; 40840087Sbostic 409*48540Sbostic for (p = path; *p++;); 410*48540Sbostic if ((copy = malloc(p - path)) != NULL) { 411*48540Sbostic register char *dc = copy; 412*48540Sbostic register shortchar_t *sc = path; 413*48540Sbostic while (*dc++ = *sc++); 41440087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 41540087Sbostic } 41640087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 41740087Sbostic return((copy == NULL) ? GLOB_NOSPACE : 0); 41840087Sbostic } 41940087Sbostic 42040087Sbostic 42140087Sbostic /* 42240087Sbostic * pattern matching function for filenames. Each occurrence of the * 42340087Sbostic * pattern causes a recursion level. 42440087Sbostic */ 425*48540Sbostic static 42640087Sbostic match(name, pat, patend) 427*48540Sbostic register shortchar_t *name, *pat, *patend; 42840087Sbostic { 429*48540Sbostic int ok, negate_range; 430*48540Sbostic shortchar_t c, k; 43140087Sbostic 43240087Sbostic while (pat < patend) { 43340087Sbostic c = *pat++; 434*48540Sbostic switch (c & M_MASK) { 43540087Sbostic case M_ALL: 43640087Sbostic if (pat == patend) 43740087Sbostic return(1); 43840087Sbostic for (; *name != EOS; ++name) { 43940087Sbostic if (match(name, pat, patend)) 44040087Sbostic return(1); 44140087Sbostic } 44240087Sbostic return(0); 44340087Sbostic case M_ONE: 44440087Sbostic if (*name++ == EOS) 44540087Sbostic return(0); 44640087Sbostic break; 44740087Sbostic case M_SET: 44840087Sbostic ok = 0; 44940087Sbostic k = *name++; 450*48540Sbostic if (negate_range = ((*pat & M_MASK) == M_NOT)) 45140087Sbostic ++pat; 452*48540Sbostic while (((c = *pat++) & M_MASK) != M_END) { 453*48540Sbostic if ((*pat & M_MASK) == M_RNG) { 45440087Sbostic if (c <= k && k <= pat[1]) 45540087Sbostic ok = 1; 45640087Sbostic pat += 2; 45740087Sbostic } 45840087Sbostic else if (c == k) 45940087Sbostic ok = 1; 46040087Sbostic } 46140087Sbostic if (ok == negate_range) 46240087Sbostic return(0); 46340087Sbostic break; 46440087Sbostic default: 46540087Sbostic if (*name++ != c) 46640087Sbostic return(0); 46740087Sbostic break; 46840087Sbostic } 46940087Sbostic } 47040087Sbostic return(*name == EOS); 47140087Sbostic } 47240087Sbostic 47340087Sbostic /* free allocated data belonging to a glob_t structure */ 47440087Sbostic void 47540087Sbostic globfree(pglob) 47640087Sbostic glob_t *pglob; 47740087Sbostic { 47840087Sbostic register int i; 47940087Sbostic register char **pp; 48040087Sbostic 48140087Sbostic if (pglob->gl_pathv != NULL) { 48240087Sbostic pp = pglob->gl_pathv + pglob->gl_offs; 48340087Sbostic for (i = pglob->gl_pathc; i--; ++pp) 48440087Sbostic if (*pp) 48547736Sbostic free((void *)*pp); 48647736Sbostic free((void *)pglob->gl_pathv); 48740087Sbostic } 48840087Sbostic } 489