140087Sbostic /* 240087Sbostic * Copyright (c) 1989 The Regents of the University of California. 340087Sbostic * All rights reserved. 440087Sbostic * 540087Sbostic * This code is derived from software contributed to Berkeley by 640087Sbostic * Guido van Rossum. 740087Sbostic * 8*42625Sbostic * %sccs.include.redist.c% 940087Sbostic */ 1040087Sbostic 1140087Sbostic #if defined(LIBC_SCCS) && !defined(lint) 12*42625Sbostic static char sccsid[] = "@(#)glob.c 5.2 (Berkeley) 06/01/90"; 1340087Sbostic #endif /* LIBC_SCCS and not lint */ 1440087Sbostic 1540087Sbostic /* 1640087Sbostic * Glob: the interface is a superset of the one defined in POSIX 1003.2, 1740087Sbostic * draft 9. 1840087Sbostic * 1940087Sbostic * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 2040087Sbostic * 2140087Sbostic * Optional extra services, controlled by flags not defined by POSIX: 2240087Sbostic * GLOB_QUOTE: escaping convention: \ inhibits any special meaning 2340087Sbostic the following character might have (except \ at end of 2440087Sbostic * string is kept); 2540087Sbostic */ 2640087Sbostic 2740087Sbostic #include <sys/param.h> 2840087Sbostic #include <sys/stat.h> 2940087Sbostic #include <dirent.h> 3040087Sbostic #include <glob.h> 3140087Sbostic #include <ctype.h> 3240087Sbostic #include <errno.h> 3340087Sbostic #include <string.h> 3440087Sbostic #include <stdio.h> 3540087Sbostic 3640087Sbostic char *malloc(), *realloc(); 3740087Sbostic 3840087Sbostic typedef int bool_t; 3940087Sbostic 4040087Sbostic #define DOLLAR '$' 4140087Sbostic #define DOT '.' 4240087Sbostic #define EOS '\0' 4340087Sbostic #define LBRACKET '[' 4440087Sbostic #define NOT '!' 4540087Sbostic #define QUESTION '?' 4640087Sbostic #define QUOTE '\\' 4740087Sbostic #define RANGE '-' 4840087Sbostic #define RBRACKET ']' 4940087Sbostic #define SEP '/' 5040087Sbostic #define STAR '*' 5140087Sbostic #define TILDE '~' 5240087Sbostic #define UNDERSCORE '_' 5340087Sbostic 5440087Sbostic #define METABIT 0x80 5540087Sbostic #define META(c) ((c)|METABIT) 5640087Sbostic #define M_ALL META('*') 5740087Sbostic #define M_END META(']') 5840087Sbostic #define M_NOT META('!') 5940087Sbostic #define M_ONE META('?') 6040087Sbostic #define M_RNG META('-') 6140087Sbostic #define M_SET META('[') 6240087Sbostic #define ismeta(c) (((c)&METABIT) != 0) 6340087Sbostic 6440087Sbostic static 6540087Sbostic compare(p, q) 6640087Sbostic char **p, **q; 6740087Sbostic { 6840087Sbostic return(strcmp(*p, *q)); 6940087Sbostic } 7040087Sbostic 7140087Sbostic 7240087Sbostic /* 7340087Sbostic * The main glob() routine: compiles the pattern (optionally processing 7440087Sbostic * quotes), calls glob1() to do the real pattern matching, and finally 7540087Sbostic * sorts the list (unless unsorted operation is requested). Returns 0 7640087Sbostic * if things went well, nonzero if errors occurred. It is not an error 7740087Sbostic * to find no matches. 7840087Sbostic */ 7940087Sbostic glob(pattern, flags, errfunc, pglob) 8040087Sbostic char *pattern; 8140087Sbostic int flags, (*errfunc)(); 8240087Sbostic glob_t *pglob; 8340087Sbostic { 8440087Sbostic int err, oldpathc; 8540087Sbostic char *bufnext, *bufend, *compilebuf, *compilepat, *patnext; 8640087Sbostic char c, patbuf[MAXPATHLEN+1]; 8740087Sbostic 8840087Sbostic patnext = pattern; 8940087Sbostic if (!(flags & GLOB_APPEND)) { 9040087Sbostic pglob->gl_pathc = 0; 9140087Sbostic pglob->gl_pathv = NULL; 9240087Sbostic if (!(flags & GLOB_DOOFFS)) 9340087Sbostic pglob->gl_offs = 0; 9440087Sbostic } 9540087Sbostic pglob->gl_flags = flags; 9640087Sbostic pglob->gl_errfunc = errfunc; 9740087Sbostic oldpathc = pglob->gl_pathc; 9840087Sbostic 9940087Sbostic bufnext = patbuf; 10040087Sbostic bufend = bufnext+MAXPATHLEN; 10140087Sbostic 10240087Sbostic compilebuf = bufnext; 10340087Sbostic compilepat = patnext; 10440087Sbostic while (bufnext < bufend && (c = *patnext++) != EOS) { 10540087Sbostic switch (c) { 10640087Sbostic case LBRACKET: 10740087Sbostic c = *patnext; 10840087Sbostic if (c == NOT) 10940087Sbostic ++patnext; 11040087Sbostic if (*patnext == EOS || 11140087Sbostic strchr(patnext+1, RBRACKET) == NULL) { 11240087Sbostic *bufnext++ = LBRACKET; 11340087Sbostic if (c == NOT) 11440087Sbostic --patnext; 11540087Sbostic break; 11640087Sbostic } 11740087Sbostic *bufnext++ = M_SET; 11840087Sbostic if (c == NOT) 11940087Sbostic *bufnext++ = M_NOT; 12040087Sbostic c = *patnext++; 12140087Sbostic do { 12240087Sbostic /* todo: quoting */ 12340087Sbostic *bufnext++ = c; 12440087Sbostic if (*patnext == RANGE && 12540087Sbostic (c = patnext[1]) != RBRACKET) { 12640087Sbostic *bufnext++ = M_RNG; 12740087Sbostic *bufnext++ = c; 12840087Sbostic patnext += 2; 12940087Sbostic } 13040087Sbostic } while ((c = *patnext++) != RBRACKET); 13140087Sbostic *bufnext++ = M_END; 13240087Sbostic break; 13340087Sbostic case QUESTION: 13440087Sbostic *bufnext++ = M_ONE; 13540087Sbostic break; 13640087Sbostic case QUOTE: 13740087Sbostic if (!(flags & GLOB_QUOTE)) 13840087Sbostic *bufnext++ = QUOTE; 13940087Sbostic else { 14040087Sbostic if ((c = *patnext++) == EOS) { 14140087Sbostic c = QUOTE; 14240087Sbostic --patnext; 14340087Sbostic } 14440087Sbostic *bufnext++ = c; 14540087Sbostic } 14640087Sbostic break; 14740087Sbostic case STAR: 14840087Sbostic *bufnext++ = M_ALL; 14940087Sbostic break; 15040087Sbostic default: 15140087Sbostic *bufnext++ = c; 15240087Sbostic break; 15340087Sbostic } 15440087Sbostic } 15540087Sbostic *bufnext = EOS; 15640087Sbostic 15740087Sbostic if ((err = glob1(patbuf, pglob)) != 0) 15840087Sbostic return(err); 15940087Sbostic 16040087Sbostic if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 16140087Sbostic if (!(flags & GLOB_QUOTE)) 16240087Sbostic (void)strcpy(compilebuf, compilepat); 16340087Sbostic else { 16440087Sbostic /* 16540087Sbostic * copy pattern, interpreting quotes; this is slightly 16640087Sbostic * different than the interpretation of quotes above 16740087Sbostic * -- which should prevail? 16840087Sbostic */ 16940087Sbostic while (*compilepat != EOS) { 17040087Sbostic if (*compilepat == QUOTE) { 17140087Sbostic if (*++compilepat == EOS) 17240087Sbostic --compilepat; 17340087Sbostic } 17440087Sbostic *compilebuf++ = *compilepat++; 17540087Sbostic } 17640087Sbostic *compilebuf = EOS; 17740087Sbostic } 17840087Sbostic return(globextend(patbuf, pglob)); 17940087Sbostic } else if (!(flags & GLOB_NOSORT)) 18040087Sbostic qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc), 18140087Sbostic pglob->gl_pathc - oldpathc, sizeof(char*), compare); 18240087Sbostic return(0); 18340087Sbostic } 18440087Sbostic 18540087Sbostic static 18640087Sbostic glob1(pattern, pglob) 18740087Sbostic char *pattern; 18840087Sbostic glob_t *pglob; 18940087Sbostic { 19040087Sbostic char pathbuf[MAXPATHLEN+1]; 19140087Sbostic 19240087Sbostic /* 19340087Sbostic * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 19440087Sbostic if (*pattern == EOS) 19540087Sbostic return(0); 19640087Sbostic return(glob2(pathbuf, pathbuf, pattern, pglob)); 19740087Sbostic } 19840087Sbostic 19940087Sbostic /* 20040087Sbostic * functions glob2 and glob3 are mutually recursive; there is one level 20140087Sbostic * of recursion for each segment in the pattern that contains one or 20240087Sbostic * more meta characters. 20340087Sbostic */ 20440087Sbostic static 20540087Sbostic glob2(pathbuf, pathend, pattern, pglob) 20640087Sbostic char *pathbuf, *pathend, *pattern; 20740087Sbostic glob_t *pglob; 20840087Sbostic { 20940087Sbostic struct stat sbuf; 21040087Sbostic bool_t anymeta = 0; 21140087Sbostic char *p, *q; 21240087Sbostic 21340087Sbostic /* 21440087Sbostic * loop over pattern segments until end of pattern or until 21540087Sbostic * segment with meta character found. 21640087Sbostic */ 21740087Sbostic for (;;) { 21840087Sbostic if (*pattern == EOS) { /* end of pattern? */ 21940087Sbostic *pathend = EOS; 22040087Sbostic if (stat(pathbuf, &sbuf) != 0) 22140087Sbostic return(0); /* need error call here? */ 22240087Sbostic if ((pglob->gl_flags & GLOB_MARK) && 22340087Sbostic pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) { 22440087Sbostic *pathend++ = SEP; 22540087Sbostic *pathend = EOS; 22640087Sbostic } 22740087Sbostic return(globextend(pathbuf, pglob)); 22840087Sbostic } 22940087Sbostic 23040087Sbostic /* find end of next segment, copy tentatively to pathend */ 23140087Sbostic q = pathend; 23240087Sbostic p = pattern; 23340087Sbostic while (*p != EOS && *p != SEP) { 23440087Sbostic if (ismeta(*p)) 23540087Sbostic anymeta = 1; 23640087Sbostic *q++ = *p++; 23740087Sbostic } 23840087Sbostic 23940087Sbostic if (!anymeta) { /* no expansion, do next segment */ 24040087Sbostic pathend = q; 24140087Sbostic pattern = p; 24240087Sbostic while (*pattern == SEP) 24340087Sbostic *pathend++ = *pattern++; 24440087Sbostic } else /* need expansion, recurse */ 24540087Sbostic return(glob3(pathbuf, pathend, pattern, p, pglob)); 24640087Sbostic } 24740087Sbostic /* NOTREACHED */ 24840087Sbostic } 24940087Sbostic 25040087Sbostic static 25140087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob) 25240087Sbostic char *pathbuf, *pathend, *pattern, *restpattern; 25340087Sbostic glob_t *pglob; 25440087Sbostic { 25540087Sbostic extern int errno; 25640087Sbostic DIR *dirp; 25740087Sbostic struct dirent *dp; 25840087Sbostic int len, err; 25940087Sbostic 26040087Sbostic *pathend = EOS; 26140087Sbostic errno = 0; 26240087Sbostic if (!(dirp = opendir(pathbuf))) 26340087Sbostic /* todo: don't call for ENOENT or ENOTDIR? */ 26440087Sbostic if (pglob->gl_errfunc && 26540087Sbostic (*pglob->gl_errfunc)(pathbuf, errno) || 26640087Sbostic (pglob->gl_flags & GLOB_ERR)) 26740087Sbostic return(GLOB_ABEND); 26840087Sbostic else 26940087Sbostic return(0); 27040087Sbostic 27140087Sbostic err = 0; 27240087Sbostic 27340087Sbostic /* search directory for matching names */ 27440087Sbostic while ((dp = readdir(dirp))) { 27540087Sbostic /* initial DOT must be matched literally */ 27640087Sbostic if (dp->d_name[0] == DOT && *pattern != DOT) 27740087Sbostic continue; 27840087Sbostic if (!match(dp->d_name, pattern, restpattern)) 27940087Sbostic continue; 28040087Sbostic len = dp->d_namlen; 28140087Sbostic (void)strcpy(pathend, dp->d_name); 28240087Sbostic err = glob2(pathbuf, pathend+len, restpattern, pglob); 28340087Sbostic if (err) 28440087Sbostic break; 28540087Sbostic } 28640087Sbostic /* todo: check error from readdir? */ 28740087Sbostic (void)closedir(dirp); 28840087Sbostic return(err); 28940087Sbostic } 29040087Sbostic 29140087Sbostic 29240087Sbostic /* 29340087Sbostic * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 29440087Sbostic * add the new item, and update gl_pathc. 29540087Sbostic * 29640087Sbostic * This assumes the BSD realloc, which only copies the block when its size 29740087Sbostic * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 29840087Sbostic * behavior. 29940087Sbostic * 30040087Sbostic * Return 0 if new item added, error code if memory couldn't be allocated. 30140087Sbostic * 30240087Sbostic * Invariant of the glob_t structure: 30340087Sbostic * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 30440087Sbostic * gl_pathv points to (gl_offs + gl_pathc + 1) items. 30540087Sbostic */ 30640087Sbostic static 30740087Sbostic globextend(path, pglob) 30840087Sbostic char *path; 30940087Sbostic glob_t *pglob; 31040087Sbostic { 31140087Sbostic register char **pathv; 31240087Sbostic register int i; 31340087Sbostic u_int copysize, newsize; 31440087Sbostic char *copy; 31540087Sbostic 31640087Sbostic newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 31740087Sbostic pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize); 31840087Sbostic if (pathv == NULL) 31940087Sbostic return(GLOB_NOSPACE); 32040087Sbostic 32140087Sbostic if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 32240087Sbostic /* first time around -- clear initial gl_offs items */ 32340087Sbostic pathv += pglob->gl_offs; 32440087Sbostic for (i = pglob->gl_offs; --i >= 0; ) 32540087Sbostic *--pathv = NULL; 32640087Sbostic } 32740087Sbostic pglob->gl_pathv = pathv; 32840087Sbostic 32940087Sbostic copysize = strlen(path) + 1; 33040087Sbostic if ((copy = malloc(copysize)) != NULL) { 33140087Sbostic (void)strcpy(copy, path); 33240087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 33340087Sbostic } 33440087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 33540087Sbostic return((copy == NULL) ? GLOB_NOSPACE : 0); 33640087Sbostic } 33740087Sbostic 33840087Sbostic 33940087Sbostic /* 34040087Sbostic * pattern matching function for filenames. Each occurrence of the * 34140087Sbostic * pattern causes a recursion level. 34240087Sbostic */ 34340087Sbostic static bool_t 34440087Sbostic match(name, pat, patend) 34540087Sbostic register char *name, *pat, *patend; 34640087Sbostic { 34740087Sbostic bool_t ok, negate_range; 34840087Sbostic char c, k; 34940087Sbostic 35040087Sbostic while (pat < patend) { 35140087Sbostic c = *pat++; 35240087Sbostic switch (c & 0xff) { 35340087Sbostic case M_ALL: 35440087Sbostic if (pat == patend) 35540087Sbostic return(1); 35640087Sbostic for (; *name != EOS; ++name) { 35740087Sbostic if (match(name, pat, patend)) 35840087Sbostic return(1); 35940087Sbostic } 36040087Sbostic return(0); 36140087Sbostic case M_ONE: 36240087Sbostic if (*name++ == EOS) 36340087Sbostic return(0); 36440087Sbostic break; 36540087Sbostic case M_SET: 36640087Sbostic ok = 0; 36740087Sbostic k = *name++; 36840087Sbostic if (negate_range = (*pat & 0xff) == M_NOT) 36940087Sbostic ++pat; 37040087Sbostic while (((c = *pat++) & 0xff) != M_END) { 37140087Sbostic if ((*pat & 0xff) == M_RNG) { 37240087Sbostic if (c <= k && k <= pat[1]) 37340087Sbostic ok = 1; 37440087Sbostic pat += 2; 37540087Sbostic } 37640087Sbostic else if (c == k) 37740087Sbostic ok = 1; 37840087Sbostic } 37940087Sbostic if (ok == negate_range) 38040087Sbostic return(0); 38140087Sbostic break; 38240087Sbostic default: 38340087Sbostic if (*name++ != c) 38440087Sbostic return(0); 38540087Sbostic break; 38640087Sbostic } 38740087Sbostic } 38840087Sbostic return(*name == EOS); 38940087Sbostic } 39040087Sbostic 39140087Sbostic /* free allocated data belonging to a glob_t structure */ 39240087Sbostic void 39340087Sbostic globfree(pglob) 39440087Sbostic glob_t *pglob; 39540087Sbostic { 39640087Sbostic register int i; 39740087Sbostic register char **pp; 39840087Sbostic 39940087Sbostic if (pglob->gl_pathv != NULL) { 40040087Sbostic pp = pglob->gl_pathv + pglob->gl_offs; 40140087Sbostic for (i = pglob->gl_pathc; i--; ++pp) 40240087Sbostic if (*pp) 40340087Sbostic (void)free(*pp); 40440087Sbostic (void)free((char *)pp); 40540087Sbostic } 40640087Sbostic } 407