140087Sbostic /* 240087Sbostic * Copyright (c) 1989 The Regents of the University of California. 340087Sbostic * All rights reserved. 440087Sbostic * 540087Sbostic * This code is derived from software contributed to Berkeley by 640087Sbostic * Guido van Rossum. 740087Sbostic * 842625Sbostic * %sccs.include.redist.c% 940087Sbostic */ 1040087Sbostic 1140087Sbostic #if defined(LIBC_SCCS) && !defined(lint) 12*50121Sbostic static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 06/24/91"; 1340087Sbostic #endif /* LIBC_SCCS and not lint */ 1440087Sbostic 1540087Sbostic /* 1650050Sbostic * glob(3) -- a superset of the one defined in POSIX 1003.2. 1740087Sbostic * 1840087Sbostic * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 1940087Sbostic * 2040087Sbostic * Optional extra services, controlled by flags not defined by POSIX: 2147590Sbostic * 2247590Sbostic * GLOB_QUOTE: 2347590Sbostic * Escaping convention: \ inhibits any special meaning the following 2447590Sbostic * character might have (except \ at end of string is retained). 2547590Sbostic * GLOB_MAGCHAR: 2648540Sbostic * Set in gl_flags if pattern contained a globbing character. 2747590Sbostic * gl_matchc: 2847590Sbostic * Number of matches in the current invocation of glob. 2940087Sbostic */ 3040087Sbostic 3148540Sbostic #include <sys/cdefs.h> 3240087Sbostic #include <sys/param.h> 3340087Sbostic #include <sys/stat.h> 3440087Sbostic #include <dirent.h> 3540087Sbostic #include <glob.h> 3640087Sbostic #include <ctype.h> 3740087Sbostic #include <errno.h> 3840087Sbostic #include <string.h> 3940087Sbostic #include <stdio.h> 4046597Sdonn #include <stdlib.h> 4140087Sbostic 4240087Sbostic #define DOLLAR '$' 4340087Sbostic #define DOT '.' 4440087Sbostic #define EOS '\0' 4540087Sbostic #define LBRACKET '[' 4640087Sbostic #define NOT '!' 4740087Sbostic #define QUESTION '?' 4840087Sbostic #define QUOTE '\\' 4940087Sbostic #define RANGE '-' 5040087Sbostic #define RBRACKET ']' 5140087Sbostic #define SEP '/' 5240087Sbostic #define STAR '*' 5340087Sbostic #define TILDE '~' 5440087Sbostic #define UNDERSCORE '_' 5540087Sbostic 5650049Sbostic #define M_QUOTE 0x8000 5750049Sbostic #define M_PROTECT 0x4000 5848540Sbostic #define M_MASK 0xffff 59*50121Sbostic #define M_ASCII 0x00ff 6048540Sbostic 61*50121Sbostic #define CHAR(c) ((c)&M_ASCII) 6250049Sbostic #define META(c) ((c)|M_QUOTE) 6340087Sbostic #define M_ALL META('*') 6440087Sbostic #define M_END META(']') 6540087Sbostic #define M_NOT META('!') 6640087Sbostic #define M_ONE META('?') 6740087Sbostic #define M_RNG META('-') 6840087Sbostic #define M_SET META('[') 6950049Sbostic #define ismeta(c) (((c)&M_QUOTE) != 0) 7040087Sbostic 7150050Sbostic typedef u_short Char; 7250049Sbostic 7350050Sbostic static int compare __P((const void *, const void *)); 7450050Sbostic static void g_Ctoc __P((Char *, char *)); 7550050Sbostic static int g_lstat __P((Char *, struct stat *)); 7650050Sbostic static DIR *g_opendir __P((Char *)); 7750050Sbostic static Char *g_strchr __P((Char *, int)); 7850050Sbostic static int g_stat __P((Char *, struct stat *)); 7950050Sbostic static int glob1 __P((Char *, glob_t *)); 8050050Sbostic static int glob2 __P((Char *, Char *, Char *, glob_t *)); 8150050Sbostic static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 8250050Sbostic static int globextend __P((Char *, glob_t *)); 8350050Sbostic static int match __P((Char *, Char *, Char *)); 8450049Sbostic #ifdef DEBUG 8550050Sbostic static void qprintf __P((Char *)); 8650049Sbostic #endif 8750049Sbostic 8840087Sbostic /* 8940087Sbostic * The main glob() routine: compiles the pattern (optionally processing 9040087Sbostic * quotes), calls glob1() to do the real pattern matching, and finally 9140087Sbostic * sorts the list (unless unsorted operation is requested). Returns 0 9240087Sbostic * if things went well, nonzero if errors occurred. It is not an error 9340087Sbostic * to find no matches. 9440087Sbostic */ 9540087Sbostic glob(pattern, flags, errfunc, pglob) 9646597Sdonn const char *pattern; 9750050Sbostic int flags, (*errfunc) __P((char *, int)); 9840087Sbostic glob_t *pglob; 9940087Sbostic { 100*50121Sbostic const u_char *compilepat, *patnext; 10150117Sbostic int c, err, oldpathc; 10250050Sbostic Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 10340087Sbostic 104*50121Sbostic patnext = (u_char *) pattern; 10540087Sbostic if (!(flags & GLOB_APPEND)) { 10640087Sbostic pglob->gl_pathc = 0; 10740087Sbostic pglob->gl_pathv = NULL; 10840087Sbostic if (!(flags & GLOB_DOOFFS)) 10940087Sbostic pglob->gl_offs = 0; 11040087Sbostic } 11147590Sbostic pglob->gl_flags = flags & ~GLOB_MAGCHAR; 11240087Sbostic pglob->gl_errfunc = errfunc; 11340087Sbostic oldpathc = pglob->gl_pathc; 11447590Sbostic pglob->gl_matchc = 0; 11540087Sbostic 11640087Sbostic bufnext = patbuf; 11750049Sbostic bufend = bufnext + MAXPATHLEN; 11850050Sbostic compilebuf = bufnext; 11950050Sbostic compilepat = patnext; 12050049Sbostic if (flags & GLOB_QUOTE) { 12150050Sbostic /* Protect the quoted characters. */ 12250049Sbostic while (bufnext < bufend && (c = *patnext++) != EOS) 12350049Sbostic if (c == QUOTE) { 12450049Sbostic if ((c = *patnext++) == EOS) { 12550049Sbostic c = QUOTE; 12650049Sbostic --patnext; 12750049Sbostic } 12850049Sbostic *bufnext++ = c | M_PROTECT; 12950049Sbostic } 13050049Sbostic else 13150049Sbostic *bufnext++ = c; 13250049Sbostic } 13350049Sbostic else 13450049Sbostic while (bufnext < bufend && (c = *patnext++) != EOS) 13550049Sbostic *bufnext++ = c; 13650049Sbostic *bufnext = EOS; 13740087Sbostic 13850049Sbostic bufnext = patbuf; 13950049Sbostic qpatnext = patbuf; 14050050Sbostic /* We don't need to check for buffer overflow any more. */ 14150049Sbostic while ((c = *qpatnext++) != EOS) { 14240087Sbostic switch (c) { 14340087Sbostic case LBRACKET: 14447590Sbostic pglob->gl_flags |= GLOB_MAGCHAR; 14550049Sbostic c = *qpatnext; 14640087Sbostic if (c == NOT) 14750049Sbostic ++qpatnext; 14850049Sbostic if (*qpatnext == EOS || 14950050Sbostic g_strchr(qpatnext+1, RBRACKET) == NULL) { 15040087Sbostic *bufnext++ = LBRACKET; 15140087Sbostic if (c == NOT) 15250049Sbostic --qpatnext; 15340087Sbostic break; 15440087Sbostic } 15540087Sbostic *bufnext++ = M_SET; 15640087Sbostic if (c == NOT) 15740087Sbostic *bufnext++ = M_NOT; 15850049Sbostic c = *qpatnext++; 15940087Sbostic do { 160*50121Sbostic *bufnext++ = CHAR(c); 16150049Sbostic if (*qpatnext == RANGE && 16250049Sbostic (c = qpatnext[1]) != RBRACKET) { 16340087Sbostic *bufnext++ = M_RNG; 164*50121Sbostic *bufnext++ = CHAR(c); 16550049Sbostic qpatnext += 2; 16640087Sbostic } 16750049Sbostic } while ((c = *qpatnext++) != RBRACKET); 16840087Sbostic *bufnext++ = M_END; 16940087Sbostic break; 17040087Sbostic case QUESTION: 17147590Sbostic pglob->gl_flags |= GLOB_MAGCHAR; 17240087Sbostic *bufnext++ = M_ONE; 17340087Sbostic break; 17440087Sbostic case STAR: 17547590Sbostic pglob->gl_flags |= GLOB_MAGCHAR; 17640087Sbostic *bufnext++ = M_ALL; 17740087Sbostic break; 17840087Sbostic default: 179*50121Sbostic *bufnext++ = CHAR(c); 18040087Sbostic break; 18140087Sbostic } 18240087Sbostic } 18340087Sbostic *bufnext = EOS; 18450049Sbostic #ifdef DEBUG 18550049Sbostic qprintf(patbuf); 18650049Sbostic #endif 18740087Sbostic 18840087Sbostic if ((err = glob1(patbuf, pglob)) != 0) 18940087Sbostic return(err); 19040087Sbostic 19140087Sbostic if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 19248540Sbostic if (!(flags & GLOB_QUOTE)) { 19350050Sbostic Char *dp = compilebuf; 194*50121Sbostic const u_char *sp = compilepat; 195*50121Sbostic while (*dp++ = *sp++); 19648540Sbostic } 19740087Sbostic else { 19840087Sbostic /* 19950050Sbostic * Copy pattern, interpreting quotes; this is slightly 20040087Sbostic * different than the interpretation of quotes above 20140087Sbostic * -- which should prevail? 20240087Sbostic */ 20340087Sbostic while (*compilepat != EOS) { 20440087Sbostic if (*compilepat == QUOTE) { 20540087Sbostic if (*++compilepat == EOS) 20640087Sbostic --compilepat; 20740087Sbostic } 20848540Sbostic *compilebuf++ = (u_char)*compilepat++; 20940087Sbostic } 21040087Sbostic *compilebuf = EOS; 21140087Sbostic } 21240087Sbostic return(globextend(patbuf, pglob)); 21348540Sbostic } else if (!(flags & GLOB_NOSORT)) 21450050Sbostic qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 21550050Sbostic pglob->gl_pathc - oldpathc, sizeof(char *), compare); 21640087Sbostic return(0); 21740087Sbostic } 21840087Sbostic 21950050Sbostic static int 22050050Sbostic compare(p, q) 22150050Sbostic const void *p, *q; 22250050Sbostic { 22350050Sbostic return(strcmp(*(char **)p, *(char **)q)); 22450050Sbostic } 22550050Sbostic 22640087Sbostic static 22740087Sbostic glob1(pattern, pglob) 22850050Sbostic Char *pattern; 22940087Sbostic glob_t *pglob; 23040087Sbostic { 23150050Sbostic Char pathbuf[MAXPATHLEN+1]; 23240087Sbostic 23350050Sbostic /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 23440087Sbostic if (*pattern == EOS) 23540087Sbostic return(0); 23640087Sbostic return(glob2(pathbuf, pathbuf, pattern, pglob)); 23740087Sbostic } 23840087Sbostic 23940087Sbostic /* 24050050Sbostic * The functions glob2 and glob3 are mutually recursive; there is one level 24150050Sbostic * of recursion for each segment in the pattern that contains one or more 24250050Sbostic * meta characters. 24340087Sbostic */ 24440087Sbostic static 24540087Sbostic glob2(pathbuf, pathend, pattern, pglob) 24650050Sbostic Char *pathbuf, *pathend, *pattern; 24740087Sbostic glob_t *pglob; 24840087Sbostic { 24950050Sbostic struct stat sb; 25050050Sbostic Char *p, *q; 25148540Sbostic int anymeta; 25240087Sbostic 25340087Sbostic /* 25450050Sbostic * Loop over pattern segments until end of pattern or until 25540087Sbostic * segment with meta character found. 25640087Sbostic */ 25748357Sbostic for (anymeta = 0;;) { 25850050Sbostic if (*pattern == EOS) { /* End of pattern? */ 25940087Sbostic *pathend = EOS; 26050050Sbostic if (g_stat(pathbuf, &sb)) 26148357Sbostic return(0); 26248540Sbostic 26348540Sbostic if (((pglob->gl_flags & GLOB_MARK) && 26450050Sbostic pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 26550050Sbostic || (S_ISLNK(sb.st_mode) && 26650050Sbostic (g_stat(pathbuf, &sb) == 0) && 26750050Sbostic S_ISDIR(sb.st_mode)))) { 26840087Sbostic *pathend++ = SEP; 26940087Sbostic *pathend = EOS; 27040087Sbostic } 27147590Sbostic ++pglob->gl_matchc; 27240087Sbostic return(globextend(pathbuf, pglob)); 27340087Sbostic } 27440087Sbostic 27550050Sbostic /* Find end of next segment, copy tentatively to pathend. */ 27640087Sbostic q = pathend; 27740087Sbostic p = pattern; 27840087Sbostic while (*p != EOS && *p != SEP) { 27940087Sbostic if (ismeta(*p)) 28040087Sbostic anymeta = 1; 28140087Sbostic *q++ = *p++; 28240087Sbostic } 28340087Sbostic 28450050Sbostic if (!anymeta) { /* No expansion, do next segment. */ 28540087Sbostic pathend = q; 28640087Sbostic pattern = p; 28740087Sbostic while (*pattern == SEP) 28840087Sbostic *pathend++ = *pattern++; 28950050Sbostic } else /* Need expansion, recurse. */ 29040087Sbostic return(glob3(pathbuf, pathend, pattern, p, pglob)); 29140087Sbostic } 29240087Sbostic /* NOTREACHED */ 29340087Sbostic } 29440087Sbostic 29540087Sbostic static 29640087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob) 29750050Sbostic Char *pathbuf, *pathend, *pattern, *restpattern; 29840087Sbostic glob_t *pglob; 29940087Sbostic { 30050050Sbostic register struct dirent *dp; 30140087Sbostic DIR *dirp; 30240087Sbostic int len, err; 30340087Sbostic 30440087Sbostic *pathend = EOS; 30540087Sbostic errno = 0; 30648540Sbostic 30750050Sbostic if (!(dirp = g_opendir(pathbuf))) 30850050Sbostic /* TODO: don't call for ENOENT or ENOTDIR? */ 30940087Sbostic if (pglob->gl_errfunc && 31040087Sbostic (*pglob->gl_errfunc)(pathbuf, errno) || 31140087Sbostic (pglob->gl_flags & GLOB_ERR)) 31240087Sbostic return(GLOB_ABEND); 31340087Sbostic else 31440087Sbostic return(0); 31540087Sbostic 31640087Sbostic err = 0; 31740087Sbostic 31850050Sbostic /* Search directory for matching names. */ 31940087Sbostic while ((dp = readdir(dirp))) { 320*50121Sbostic register u_char *sc; 32150050Sbostic register Char *dc; 32250050Sbostic 32350050Sbostic /* Initial DOT must be matched literally. */ 32440087Sbostic if (dp->d_name[0] == DOT && *pattern != DOT) 32540087Sbostic continue; 326*50121Sbostic for (sc = (u_char *) dp->d_name, dc = pathend; 327*50121Sbostic *dc++ = *sc++;); 32848540Sbostic if (!match(pathend, pattern, restpattern)) { 32948540Sbostic *pathend = EOS; 33040087Sbostic continue; 33148540Sbostic } 33248540Sbostic err = glob2(pathbuf, --dc, restpattern, pglob); 33340087Sbostic if (err) 33440087Sbostic break; 33540087Sbostic } 33650050Sbostic 33750050Sbostic /* TODO: check error from readdir? */ 33840087Sbostic (void)closedir(dirp); 33940087Sbostic return(err); 34040087Sbostic } 34140087Sbostic 34240087Sbostic 34340087Sbostic /* 34440087Sbostic * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 34540087Sbostic * add the new item, and update gl_pathc. 34640087Sbostic * 34740087Sbostic * This assumes the BSD realloc, which only copies the block when its size 34840087Sbostic * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 34940087Sbostic * behavior. 35040087Sbostic * 35140087Sbostic * Return 0 if new item added, error code if memory couldn't be allocated. 35240087Sbostic * 35340087Sbostic * Invariant of the glob_t structure: 35440087Sbostic * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 35550050Sbostic * gl_pathv points to (gl_offs + gl_pathc + 1) items. 35640087Sbostic */ 35748540Sbostic static int 35840087Sbostic globextend(path, pglob) 35950050Sbostic Char *path; 36040087Sbostic glob_t *pglob; 36140087Sbostic { 36240087Sbostic register char **pathv; 36340087Sbostic register int i; 36448540Sbostic u_int newsize; 36540087Sbostic char *copy; 36650050Sbostic Char *p; 36740087Sbostic 36840087Sbostic newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 36948540Sbostic pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 37040087Sbostic if (pathv == NULL) 37140087Sbostic return(GLOB_NOSPACE); 37240087Sbostic 37340087Sbostic if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 37440087Sbostic /* first time around -- clear initial gl_offs items */ 37540087Sbostic pathv += pglob->gl_offs; 37640087Sbostic for (i = pglob->gl_offs; --i >= 0; ) 37740087Sbostic *--pathv = NULL; 37840087Sbostic } 37940087Sbostic pglob->gl_pathv = pathv; 38040087Sbostic 38148540Sbostic for (p = path; *p++;); 38248540Sbostic if ((copy = malloc(p - path)) != NULL) { 38350050Sbostic g_Ctoc(path, copy); 38440087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 38540087Sbostic } 38640087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 38750050Sbostic return(copy == NULL ? GLOB_NOSPACE : 0); 38840087Sbostic } 38940087Sbostic 39040087Sbostic 39140087Sbostic /* 39240087Sbostic * pattern matching function for filenames. Each occurrence of the * 39340087Sbostic * pattern causes a recursion level. 39440087Sbostic */ 39548540Sbostic static 39640087Sbostic match(name, pat, patend) 39750050Sbostic register Char *name, *pat, *patend; 39840087Sbostic { 39948540Sbostic int ok, negate_range; 40050050Sbostic Char c, k; 40140087Sbostic 40240087Sbostic while (pat < patend) { 40340087Sbostic c = *pat++; 40448540Sbostic switch (c & M_MASK) { 40540087Sbostic case M_ALL: 40640087Sbostic if (pat == patend) 40740087Sbostic return(1); 40850050Sbostic for (; *name != EOS; ++name) 40940087Sbostic if (match(name, pat, patend)) 41040087Sbostic return(1); 41140087Sbostic return(0); 41240087Sbostic case M_ONE: 41340087Sbostic if (*name++ == EOS) 41440087Sbostic return(0); 41540087Sbostic break; 41640087Sbostic case M_SET: 41740087Sbostic ok = 0; 41840087Sbostic k = *name++; 41948540Sbostic if (negate_range = ((*pat & M_MASK) == M_NOT)) 42040087Sbostic ++pat; 42150050Sbostic while (((c = *pat++) & M_MASK) != M_END) 42248540Sbostic if ((*pat & M_MASK) == M_RNG) { 42340087Sbostic if (c <= k && k <= pat[1]) 42440087Sbostic ok = 1; 42540087Sbostic pat += 2; 42650050Sbostic } else if (c == k) 42740087Sbostic ok = 1; 42840087Sbostic if (ok == negate_range) 42940087Sbostic return(0); 43040087Sbostic break; 43140087Sbostic default: 43240087Sbostic if (*name++ != c) 43340087Sbostic return(0); 43440087Sbostic break; 43540087Sbostic } 43640087Sbostic } 43740087Sbostic return(*name == EOS); 43840087Sbostic } 43940087Sbostic 44050050Sbostic /* Free allocated data belonging to a glob_t structure. */ 44140087Sbostic void 44240087Sbostic globfree(pglob) 44340087Sbostic glob_t *pglob; 44440087Sbostic { 44540087Sbostic register int i; 44640087Sbostic register char **pp; 44740087Sbostic 44840087Sbostic if (pglob->gl_pathv != NULL) { 44940087Sbostic pp = pglob->gl_pathv + pglob->gl_offs; 45040087Sbostic for (i = pglob->gl_pathc; i--; ++pp) 45140087Sbostic if (*pp) 45250050Sbostic free(*pp); 45350049Sbostic free(pglob->gl_pathv); 45440087Sbostic } 45540087Sbostic } 45650050Sbostic 45750050Sbostic static DIR * 45850050Sbostic g_opendir(str) 45950050Sbostic register Char *str; 46050050Sbostic { 46150050Sbostic char buf[MAXPATHLEN]; 46250050Sbostic 46350050Sbostic if (!*str) 46450050Sbostic return(opendir(".")); 46550050Sbostic g_Ctoc(str, buf); 46650050Sbostic return(opendir(buf)); 46750050Sbostic } 46850050Sbostic 46950050Sbostic static int 47050050Sbostic g_lstat(fn, sb) 47150050Sbostic register Char *fn; 47250050Sbostic struct stat *sb; 47350050Sbostic { 47450050Sbostic char buf[MAXPATHLEN]; 47550050Sbostic 47650050Sbostic g_Ctoc(fn, buf); 47750050Sbostic return(lstat(buf, sb)); 47850050Sbostic } 47950050Sbostic 48050050Sbostic static int 48150050Sbostic g_stat(fn, sb) 48250050Sbostic register Char *fn; 48350050Sbostic struct stat *sb; 48450050Sbostic { 48550050Sbostic char buf[MAXPATHLEN]; 48650050Sbostic 48750050Sbostic g_Ctoc(fn, buf); 48850050Sbostic return(stat(buf, sb)); 48950050Sbostic } 49050050Sbostic 49150050Sbostic static Char * 49250050Sbostic g_strchr(str, ch) 49350050Sbostic Char *str; 49450050Sbostic int ch; 49550050Sbostic { 49650050Sbostic do { 49750050Sbostic if (*str == ch) 49850050Sbostic return (str); 49950050Sbostic } while (*str++); 50050050Sbostic return (NULL); 50150050Sbostic } 50250050Sbostic 50350050Sbostic static void 50450050Sbostic g_Ctoc(str, buf) 50550050Sbostic register Char *str; 50650050Sbostic char *buf; 50750050Sbostic { 50850050Sbostic register char *dc; 50950050Sbostic 51050050Sbostic for (dc = buf; *dc++ = *str++;); 51150050Sbostic } 51250050Sbostic 51350050Sbostic #ifdef DEBUG 51450050Sbostic static void 51550050Sbostic qprintf(s) 51650050Sbostic register Char *s; 51750050Sbostic { 51850050Sbostic register Char *p; 51950050Sbostic 52050050Sbostic for (p = s; *p; p++) 52150050Sbostic (void)printf("%c", *p & 0xff); 52250050Sbostic (void)printf("\n"); 52350050Sbostic for (p = s; *p; p++) 52450050Sbostic (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 52550050Sbostic (void)printf("\n"); 52650050Sbostic for (p = s; *p; p++) 52750050Sbostic (void)printf("%c", *p & M_META ? '_' : ' '); 52850050Sbostic (void)printf("\n"); 52950050Sbostic } 53050050Sbostic #endif 531