xref: /csrg-svn/lib/libc/gen/glob.c (revision 48540)
140087Sbostic /*
240087Sbostic  * Copyright (c) 1989 The Regents of the University of California.
340087Sbostic  * All rights reserved.
440087Sbostic  *
540087Sbostic  * This code is derived from software contributed to Berkeley by
640087Sbostic  * Guido van Rossum.
740087Sbostic  *
842625Sbostic  * %sccs.include.redist.c%
940087Sbostic  */
1040087Sbostic 
1140087Sbostic #if defined(LIBC_SCCS) && !defined(lint)
12*48540Sbostic static char sccsid[] = "@(#)glob.c	5.8 (Berkeley) 04/23/91";
1340087Sbostic #endif /* LIBC_SCCS and not lint */
1440087Sbostic 
1540087Sbostic /*
1640087Sbostic  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
1740087Sbostic  * draft 9.
1840087Sbostic  *
1940087Sbostic  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
2040087Sbostic  *
2140087Sbostic  * Optional extra services, controlled by flags not defined by POSIX:
2247590Sbostic  *
2347590Sbostic  * GLOB_QUOTE:
2447590Sbostic  *	Escaping convention: \ inhibits any special meaning the following
2547590Sbostic  *	character might have (except \ at end of string is retained).
2647590Sbostic  * GLOB_MAGCHAR:
27*48540Sbostic  *	Set in gl_flags if pattern contained a globbing character.
2847590Sbostic  * gl_matchc:
2947590Sbostic  *	Number of matches in the current invocation of glob.
3040087Sbostic  */
3140087Sbostic 
32*48540Sbostic #include <sys/cdefs.h>
3340087Sbostic #include <sys/param.h>
3440087Sbostic #include <sys/stat.h>
3540087Sbostic #include <dirent.h>
3640087Sbostic #include <glob.h>
3740087Sbostic #include <ctype.h>
3840087Sbostic #include <errno.h>
3940087Sbostic #include <string.h>
4040087Sbostic #include <stdio.h>
4146597Sdonn #include <stdlib.h>
4240087Sbostic 
43*48540Sbostic static int glob1(), glob2(), glob3(), globextend(), match();
4440087Sbostic 
4540087Sbostic #define	DOLLAR		'$'
4640087Sbostic #define	DOT		'.'
4740087Sbostic #define	EOS		'\0'
4840087Sbostic #define	LBRACKET	'['
4940087Sbostic #define	NOT		'!'
5040087Sbostic #define	QUESTION	'?'
5140087Sbostic #define	QUOTE		'\\'
5240087Sbostic #define	RANGE		'-'
5340087Sbostic #define	RBRACKET	']'
5440087Sbostic #define	SEP		'/'
5540087Sbostic #define	STAR		'*'
5640087Sbostic #define	TILDE		'~'
5740087Sbostic #define	UNDERSCORE	'_'
5840087Sbostic 
59*48540Sbostic #define	SHORT_STRINGS
60*48540Sbostic #ifdef SHORT_STRINGS
61*48540Sbostic typedef u_short shortchar_t;
62*48540Sbostic #define	METABIT		0x8000
63*48540Sbostic #define	M_MASK		0xffff
64*48540Sbostic #else
65*48540Sbostic typedef char shortchar_t;
6640087Sbostic #define	METABIT		0x80
67*48540Sbostic #define	M_MASK		0xff
68*48540Sbostic #endif
69*48540Sbostic 
7040087Sbostic #define	META(c)		((c)|METABIT)
7140087Sbostic #define	M_ALL		META('*')
7240087Sbostic #define	M_END		META(']')
7340087Sbostic #define	M_NOT		META('!')
7440087Sbostic #define	M_ONE		META('?')
7540087Sbostic #define	M_RNG		META('-')
7640087Sbostic #define	M_SET		META('[')
7740087Sbostic #define	ismeta(c)	(((c)&METABIT) != 0)
7840087Sbostic 
79*48540Sbostic #ifdef SHORT_STRINGS
80*48540Sbostic static DIR *
81*48540Sbostic Opendir(str)
82*48540Sbostic register shortchar_t *str;
83*48540Sbostic {
84*48540Sbostic 	register char *dc;
85*48540Sbostic 	char buf[MAXPATHLEN];
86*48540Sbostic 
87*48540Sbostic 	if (!*str)
88*48540Sbostic 		return(opendir("."));
89*48540Sbostic 	for (dc = buf; *dc++ = *str++;);
90*48540Sbostic 	return(opendir(buf));
91*48540Sbostic }
92*48540Sbostic 
93*48540Sbostic static int
94*48540Sbostic Lstat(fn, sb)
95*48540Sbostic register shortchar_t *fn;
96*48540Sbostic struct stat *sb;
97*48540Sbostic {
98*48540Sbostic 	register char *dc;
99*48540Sbostic 	char buf[MAXPATHLEN];
100*48540Sbostic 
101*48540Sbostic 	for (dc = buf; *dc++ = *fn++;);
102*48540Sbostic 	return(lstat(buf, sb));
103*48540Sbostic }
104*48540Sbostic 
105*48540Sbostic static int
106*48540Sbostic Stat(fn, sb)
107*48540Sbostic register shortchar_t *fn;
108*48540Sbostic struct stat *sb;
109*48540Sbostic {
110*48540Sbostic 	register char *dc;
111*48540Sbostic 	char buf[MAXPATHLEN];
112*48540Sbostic 
113*48540Sbostic 	for (dc = buf; *dc++ = *fn++;);
114*48540Sbostic 	return(stat(buf, sb));
115*48540Sbostic }
116*48540Sbostic #else
117*48540Sbostic #define	Opendir	opendir
118*48540Sbostic #define	Stat	stat
119*48540Sbostic #define	Lstat	lstat
120*48540Sbostic #endif
121*48540Sbostic 
122*48540Sbostic static int
12340087Sbostic compare(p, q)
12443531Sbostic 	void **p, **q;
12540087Sbostic {
12643531Sbostic 	return(strcmp(*(char **)p, *(char **)q));
12740087Sbostic }
12840087Sbostic 
12940087Sbostic /*
13040087Sbostic  * The main glob() routine: compiles the pattern (optionally processing
13140087Sbostic  * quotes), calls glob1() to do the real pattern matching, and finally
13240087Sbostic  * sorts the list (unless unsorted operation is requested).  Returns 0
13340087Sbostic  * if things went well, nonzero if errors occurred.  It is not an error
13440087Sbostic  * to find no matches.
13540087Sbostic  */
13640087Sbostic glob(pattern, flags, errfunc, pglob)
13746597Sdonn 	const char *pattern;
13846597Sdonn 	int flags;
13946597Sdonn 	int (*errfunc) __P((char *, int));
14040087Sbostic 	glob_t *pglob;
14140087Sbostic {
14240087Sbostic 	int err, oldpathc;
143*48540Sbostic 	shortchar_t *bufnext, *bufend, *compilebuf;
14446597Sdonn 	const char *compilepat, *patnext;
145*48540Sbostic 	char c;
146*48540Sbostic 	shortchar_t patbuf[MAXPATHLEN+1];
14740087Sbostic 
14840087Sbostic 	patnext = pattern;
14940087Sbostic 	if (!(flags & GLOB_APPEND)) {
15040087Sbostic 		pglob->gl_pathc = 0;
15140087Sbostic 		pglob->gl_pathv = NULL;
15240087Sbostic 		if (!(flags & GLOB_DOOFFS))
15340087Sbostic 			pglob->gl_offs = 0;
15440087Sbostic 	}
15547590Sbostic 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
15640087Sbostic 	pglob->gl_errfunc = errfunc;
15740087Sbostic 	oldpathc = pglob->gl_pathc;
15847590Sbostic 	pglob->gl_matchc = 0;
15940087Sbostic 
16040087Sbostic 	bufnext = patbuf;
16140087Sbostic 	bufend = bufnext+MAXPATHLEN;
16240087Sbostic 
16340087Sbostic 	compilebuf = bufnext;
16440087Sbostic 	compilepat = patnext;
16540087Sbostic 	while (bufnext < bufend && (c = *patnext++) != EOS) {
16640087Sbostic 		switch (c) {
16740087Sbostic 		case LBRACKET:
16847590Sbostic 			pglob->gl_flags |= GLOB_MAGCHAR;
16940087Sbostic 			c = *patnext;
17040087Sbostic 			if (c == NOT)
17140087Sbostic 				++patnext;
17240087Sbostic 			if (*patnext == EOS ||
17340087Sbostic 			    strchr(patnext+1, RBRACKET) == NULL) {
17440087Sbostic 				*bufnext++ = LBRACKET;
17540087Sbostic 				if (c == NOT)
17640087Sbostic 					--patnext;
17740087Sbostic 				break;
17840087Sbostic 			}
17940087Sbostic 			*bufnext++ = M_SET;
18040087Sbostic 			if (c == NOT)
18140087Sbostic 				*bufnext++ = M_NOT;
18240087Sbostic 			c = *patnext++;
18340087Sbostic 			do {
18440087Sbostic 				/* todo: quoting */
18540087Sbostic 				*bufnext++ = c;
18640087Sbostic 				if (*patnext == RANGE &&
18740087Sbostic 				    (c = patnext[1]) != RBRACKET) {
18840087Sbostic 					*bufnext++ = M_RNG;
18940087Sbostic 					*bufnext++ = c;
19040087Sbostic 					patnext += 2;
19140087Sbostic 				}
19240087Sbostic 			} while ((c = *patnext++) != RBRACKET);
19340087Sbostic 			*bufnext++ = M_END;
19440087Sbostic 			break;
19540087Sbostic 		case QUESTION:
19647590Sbostic 			pglob->gl_flags |= GLOB_MAGCHAR;
19740087Sbostic 			*bufnext++ = M_ONE;
19840087Sbostic 			break;
19940087Sbostic 		case QUOTE:
20040087Sbostic 			if (!(flags & GLOB_QUOTE))
20140087Sbostic 				*bufnext++ = QUOTE;
20240087Sbostic 			else {
20340087Sbostic 				if ((c = *patnext++) == EOS) {
20440087Sbostic 					c = QUOTE;
20540087Sbostic 					--patnext;
20640087Sbostic 				}
20740087Sbostic 				*bufnext++ = c;
20840087Sbostic 			}
20940087Sbostic 			break;
21040087Sbostic 		case STAR:
21147590Sbostic 			pglob->gl_flags |= GLOB_MAGCHAR;
21240087Sbostic 			*bufnext++ = M_ALL;
21340087Sbostic 			break;
21440087Sbostic 		default:
21540087Sbostic 			*bufnext++ = c;
21640087Sbostic 			break;
21740087Sbostic 		}
21840087Sbostic 	}
21940087Sbostic 	*bufnext = EOS;
22040087Sbostic 
22140087Sbostic 	if ((err = glob1(patbuf, pglob)) != 0)
22240087Sbostic 		return(err);
22340087Sbostic 
22440087Sbostic 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
225*48540Sbostic 		if (!(flags & GLOB_QUOTE)) {
226*48540Sbostic 			shortchar_t *dp = compilebuf;
227*48540Sbostic 			const char *sp = compilepat;
228*48540Sbostic 			while (*dp++ = (u_char)*sp++);
229*48540Sbostic 		}
23040087Sbostic 		else {
23140087Sbostic 			/*
23240087Sbostic 			 * copy pattern, interpreting quotes; this is slightly
23340087Sbostic 			 * different than the interpretation of quotes above
23440087Sbostic 			 * -- which should prevail?
23540087Sbostic 			 */
23640087Sbostic 			while (*compilepat != EOS) {
23740087Sbostic 				if (*compilepat == QUOTE) {
23840087Sbostic 					if (*++compilepat == EOS)
23940087Sbostic 						--compilepat;
24040087Sbostic 				}
241*48540Sbostic 				*compilebuf++ = (u_char)*compilepat++;
24240087Sbostic 			}
24340087Sbostic 			*compilebuf = EOS;
24440087Sbostic 		}
24540087Sbostic 		return(globextend(patbuf, pglob));
246*48540Sbostic 	} else if (!(flags & GLOB_NOSORT))
247*48540Sbostic 		qsort((char*)(pglob->gl_pathv + pglob->gl_offs + oldpathc),
24840087Sbostic 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
24940087Sbostic 	return(0);
25040087Sbostic }
25140087Sbostic 
25240087Sbostic static
25340087Sbostic glob1(pattern, pglob)
254*48540Sbostic 	shortchar_t *pattern;
25540087Sbostic 	glob_t *pglob;
25640087Sbostic {
257*48540Sbostic 	shortchar_t pathbuf[MAXPATHLEN+1];
25840087Sbostic 
25940087Sbostic 	/*
260*48540Sbostic 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
261*48540Sbostic 	 */
26240087Sbostic 	if (*pattern == EOS)
26340087Sbostic 		return(0);
26440087Sbostic 	return(glob2(pathbuf, pathbuf, pattern, pglob));
26540087Sbostic }
26640087Sbostic 
26740087Sbostic /*
26840087Sbostic  * functions glob2 and glob3 are mutually recursive; there is one level
26940087Sbostic  * of recursion for each segment in the pattern that contains one or
27040087Sbostic  * more meta characters.
27140087Sbostic  */
27240087Sbostic static
27340087Sbostic glob2(pathbuf, pathend, pattern, pglob)
274*48540Sbostic 	shortchar_t *pathbuf, *pathend, *pattern;
27540087Sbostic 	glob_t *pglob;
27640087Sbostic {
277*48540Sbostic 	struct stat sbuf;
278*48540Sbostic 	shortchar_t *p, *q;
279*48540Sbostic 	int anymeta;
28040087Sbostic 
28140087Sbostic 	/*
28240087Sbostic 	 * loop over pattern segments until end of pattern or until
28340087Sbostic 	 * segment with meta character found.
28440087Sbostic 	 */
28548357Sbostic 	for (anymeta = 0;;) {
28640087Sbostic 		if (*pattern == EOS) {		/* end of pattern? */
28740087Sbostic 			*pathend = EOS;
288*48540Sbostic 			if (Lstat(pathbuf, &sbuf))
28948357Sbostic 				return(0);
290*48540Sbostic 
291*48540Sbostic 			if (((pglob->gl_flags & GLOB_MARK) &&
292*48540Sbostic 			    pathend[-1] != SEP) && (S_ISDIR(sbuf.st_mode)
293*48540Sbostic 			    || (S_ISLNK(sbuf.st_mode) &&
294*48540Sbostic 			    (Stat(pathbuf, &sbuf) == 0) &&
295*48540Sbostic 			    S_ISDIR(sbuf.st_mode)))) {
29640087Sbostic 				*pathend++ = SEP;
29740087Sbostic 				*pathend = EOS;
29840087Sbostic 			}
29947590Sbostic 			++pglob->gl_matchc;
30040087Sbostic 			return(globextend(pathbuf, pglob));
30140087Sbostic 		}
30240087Sbostic 
30340087Sbostic 		/* find end of next segment, copy tentatively to pathend */
30440087Sbostic 		q = pathend;
30540087Sbostic 		p = pattern;
30640087Sbostic 		while (*p != EOS && *p != SEP) {
30740087Sbostic 			if (ismeta(*p))
30840087Sbostic 				anymeta = 1;
30940087Sbostic 			*q++ = *p++;
31040087Sbostic 		}
31140087Sbostic 
31240087Sbostic 		if (!anymeta) {		/* no expansion, do next segment */
31340087Sbostic 			pathend = q;
31440087Sbostic 			pattern = p;
31540087Sbostic 			while (*pattern == SEP)
31640087Sbostic 				*pathend++ = *pattern++;
31740087Sbostic 		} else			/* need expansion, recurse */
31840087Sbostic 			return(glob3(pathbuf, pathend, pattern, p, pglob));
31940087Sbostic 	}
32040087Sbostic 	/* NOTREACHED */
32140087Sbostic }
32240087Sbostic 
323*48540Sbostic 
32440087Sbostic static
32540087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob)
326*48540Sbostic 	shortchar_t *pathbuf, *pathend, *pattern, *restpattern;
32740087Sbostic 	glob_t *pglob;
32840087Sbostic {
32940087Sbostic 	extern int errno;
33040087Sbostic 	DIR *dirp;
33140087Sbostic 	struct dirent *dp;
33240087Sbostic 	int len, err;
33340087Sbostic 
33440087Sbostic 	*pathend = EOS;
33540087Sbostic 	errno = 0;
336*48540Sbostic 
337*48540Sbostic 	if (!(dirp = Opendir(pathbuf)))
33840087Sbostic 		/* todo: don't call for ENOENT or ENOTDIR? */
33940087Sbostic 		if (pglob->gl_errfunc &&
34040087Sbostic 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
34140087Sbostic 		    (pglob->gl_flags & GLOB_ERR))
34240087Sbostic 			return(GLOB_ABEND);
34340087Sbostic 		else
34440087Sbostic 			return(0);
34540087Sbostic 
34640087Sbostic 	err = 0;
34740087Sbostic 
34840087Sbostic 	/* search directory for matching names */
34940087Sbostic 	while ((dp = readdir(dirp))) {
350*48540Sbostic 		register char *sc;
351*48540Sbostic 		register shortchar_t *dc;
35240087Sbostic 		/* initial DOT must be matched literally */
35340087Sbostic 		if (dp->d_name[0] == DOT && *pattern != DOT)
35440087Sbostic 			continue;
355*48540Sbostic 		for (sc = dp->d_name, dc = pathend;
356*48540Sbostic 		     *dc++ = (u_char)*sc++;);
357*48540Sbostic 		if (!match(pathend, pattern, restpattern)) {
358*48540Sbostic 			*pathend = EOS;
35940087Sbostic 			continue;
360*48540Sbostic 		}
361*48540Sbostic 		err = glob2(pathbuf, --dc, restpattern, pglob);
36240087Sbostic 		if (err)
36340087Sbostic 			break;
36440087Sbostic 	}
36540087Sbostic 	/* todo: check error from readdir? */
36640087Sbostic 	(void)closedir(dirp);
36740087Sbostic 	return(err);
36840087Sbostic }
36940087Sbostic 
37040087Sbostic 
37140087Sbostic /*
37240087Sbostic  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
37340087Sbostic  * add the new item, and update gl_pathc.
37440087Sbostic  *
37540087Sbostic  * This assumes the BSD realloc, which only copies the block when its size
37640087Sbostic  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
37740087Sbostic  * behavior.
37840087Sbostic  *
37940087Sbostic  * Return 0 if new item added, error code if memory couldn't be allocated.
38040087Sbostic  *
38140087Sbostic  * Invariant of the glob_t structure:
38240087Sbostic  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
38340087Sbostic  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
38440087Sbostic  */
385*48540Sbostic static int
38640087Sbostic globextend(path, pglob)
387*48540Sbostic 	shortchar_t *path;
38840087Sbostic 	glob_t *pglob;
38940087Sbostic {
39040087Sbostic 	register char **pathv;
39140087Sbostic 	register int i;
392*48540Sbostic 	u_int newsize;
39340087Sbostic 	char *copy;
394*48540Sbostic 	shortchar_t *p;
39540087Sbostic 
39640087Sbostic 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
397*48540Sbostic 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
39840087Sbostic 	if (pathv == NULL)
39940087Sbostic 		return(GLOB_NOSPACE);
40040087Sbostic 
40140087Sbostic 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
40240087Sbostic 		/* first time around -- clear initial gl_offs items */
40340087Sbostic 		pathv += pglob->gl_offs;
40440087Sbostic 		for (i = pglob->gl_offs; --i >= 0; )
40540087Sbostic 			*--pathv = NULL;
40640087Sbostic 	}
40740087Sbostic 	pglob->gl_pathv = pathv;
40840087Sbostic 
409*48540Sbostic 	for (p = path; *p++;);
410*48540Sbostic 	if ((copy = malloc(p - path)) != NULL) {
411*48540Sbostic 		register char *dc = copy;
412*48540Sbostic 		register shortchar_t *sc = path;
413*48540Sbostic 		while (*dc++ = *sc++);
41440087Sbostic 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
41540087Sbostic 	}
41640087Sbostic 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
41740087Sbostic 	return((copy == NULL) ? GLOB_NOSPACE : 0);
41840087Sbostic }
41940087Sbostic 
42040087Sbostic 
42140087Sbostic /*
42240087Sbostic  * pattern matching function for filenames.  Each occurrence of the *
42340087Sbostic  * pattern causes a recursion level.
42440087Sbostic  */
425*48540Sbostic static
42640087Sbostic match(name, pat, patend)
427*48540Sbostic 	register shortchar_t *name, *pat, *patend;
42840087Sbostic {
429*48540Sbostic 	int ok, negate_range;
430*48540Sbostic 	shortchar_t c, k;
43140087Sbostic 
43240087Sbostic 	while (pat < patend) {
43340087Sbostic 		c = *pat++;
434*48540Sbostic 		switch (c & M_MASK) {
43540087Sbostic 		case M_ALL:
43640087Sbostic 			if (pat == patend)
43740087Sbostic 				return(1);
43840087Sbostic 			for (; *name != EOS; ++name) {
43940087Sbostic 				if (match(name, pat, patend))
44040087Sbostic 					return(1);
44140087Sbostic 			}
44240087Sbostic 			return(0);
44340087Sbostic 		case M_ONE:
44440087Sbostic 			if (*name++ == EOS)
44540087Sbostic 				return(0);
44640087Sbostic 			break;
44740087Sbostic 		case M_SET:
44840087Sbostic 			ok = 0;
44940087Sbostic 			k = *name++;
450*48540Sbostic 			if (negate_range = ((*pat & M_MASK) == M_NOT))
45140087Sbostic 				++pat;
452*48540Sbostic 			while (((c = *pat++) & M_MASK) != M_END) {
453*48540Sbostic 				if ((*pat & M_MASK) == M_RNG) {
45440087Sbostic 					if (c <= k && k <= pat[1])
45540087Sbostic 						ok = 1;
45640087Sbostic 					pat += 2;
45740087Sbostic 				}
45840087Sbostic 				else if (c == k)
45940087Sbostic 					ok = 1;
46040087Sbostic 			}
46140087Sbostic 			if (ok == negate_range)
46240087Sbostic 				return(0);
46340087Sbostic 			break;
46440087Sbostic 		default:
46540087Sbostic 			if (*name++ != c)
46640087Sbostic 				return(0);
46740087Sbostic 			break;
46840087Sbostic 		}
46940087Sbostic 	}
47040087Sbostic 	return(*name == EOS);
47140087Sbostic }
47240087Sbostic 
47340087Sbostic /* free allocated data belonging to a glob_t structure */
47440087Sbostic void
47540087Sbostic globfree(pglob)
47640087Sbostic 	glob_t *pglob;
47740087Sbostic {
47840087Sbostic 	register int i;
47940087Sbostic 	register char **pp;
48040087Sbostic 
48140087Sbostic 	if (pglob->gl_pathv != NULL) {
48240087Sbostic 		pp = pglob->gl_pathv + pglob->gl_offs;
48340087Sbostic 		for (i = pglob->gl_pathc; i--; ++pp)
48440087Sbostic 			if (*pp)
48547736Sbostic 				free((void *)*pp);
48647736Sbostic 		free((void *)pglob->gl_pathv);
48740087Sbostic 	}
48840087Sbostic }
489