xref: /csrg-svn/lib/libc/gen/glob.c (revision 48357)
140087Sbostic /*
240087Sbostic  * Copyright (c) 1989 The Regents of the University of California.
340087Sbostic  * All rights reserved.
440087Sbostic  *
540087Sbostic  * This code is derived from software contributed to Berkeley by
640087Sbostic  * Guido van Rossum.
740087Sbostic  *
842625Sbostic  * %sccs.include.redist.c%
940087Sbostic  */
1040087Sbostic 
1140087Sbostic #if defined(LIBC_SCCS) && !defined(lint)
12*48357Sbostic static char sccsid[] = "@(#)glob.c	5.7 (Berkeley) 04/19/91";
1340087Sbostic #endif /* LIBC_SCCS and not lint */
1440087Sbostic 
1540087Sbostic /*
1640087Sbostic  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
1740087Sbostic  * draft 9.
1840087Sbostic  *
1940087Sbostic  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
2040087Sbostic  *
2140087Sbostic  * Optional extra services, controlled by flags not defined by POSIX:
2247590Sbostic  *
2347590Sbostic  * GLOB_QUOTE:
2447590Sbostic  *	Escaping convention: \ inhibits any special meaning the following
2547590Sbostic  *	character might have (except \ at end of string is retained).
2647590Sbostic  * GLOB_MAGCHAR:
2747590Sbostic  *	Set in gl_flags is pattern contained a globbing character.
2847590Sbostic  * gl_matchc:
2947590Sbostic  *	Number of matches in the current invocation of glob.
3040087Sbostic  */
3140087Sbostic 
3240087Sbostic #include <sys/param.h>
3340087Sbostic #include <sys/stat.h>
3440087Sbostic #include <dirent.h>
3540087Sbostic #include <glob.h>
3640087Sbostic #include <ctype.h>
3740087Sbostic #include <errno.h>
3840087Sbostic #include <string.h>
3940087Sbostic #include <stdio.h>
4046597Sdonn #include <stdlib.h>
4140087Sbostic 
4240087Sbostic typedef int bool_t;
4340087Sbostic 
4446597Sdonn static glob1();
4546597Sdonn static glob2();
4646597Sdonn static glob3();
4746597Sdonn static globextend();
4846597Sdonn static bool_t match();
4946597Sdonn 
5040087Sbostic #define	DOLLAR		'$'
5140087Sbostic #define	DOT		'.'
5240087Sbostic #define	EOS		'\0'
5340087Sbostic #define	LBRACKET	'['
5440087Sbostic #define	NOT		'!'
5540087Sbostic #define	QUESTION	'?'
5640087Sbostic #define	QUOTE		'\\'
5740087Sbostic #define	RANGE		'-'
5840087Sbostic #define	RBRACKET	']'
5940087Sbostic #define	SEP		'/'
6040087Sbostic #define	STAR		'*'
6140087Sbostic #define	TILDE		'~'
6240087Sbostic #define	UNDERSCORE	'_'
6340087Sbostic 
6440087Sbostic #define	METABIT		0x80
6540087Sbostic #define	META(c)		((c)|METABIT)
6640087Sbostic #define	M_ALL		META('*')
6740087Sbostic #define	M_END		META(']')
6840087Sbostic #define	M_NOT		META('!')
6940087Sbostic #define	M_ONE		META('?')
7040087Sbostic #define	M_RNG		META('-')
7140087Sbostic #define	M_SET		META('[')
7240087Sbostic #define	ismeta(c)	(((c)&METABIT) != 0)
7340087Sbostic 
7440087Sbostic static
7540087Sbostic compare(p, q)
7643531Sbostic 	void **p, **q;
7740087Sbostic {
7843531Sbostic 	return(strcmp(*(char **)p, *(char **)q));
7940087Sbostic }
8040087Sbostic 
8140087Sbostic 
8240087Sbostic /*
8340087Sbostic  * The main glob() routine: compiles the pattern (optionally processing
8440087Sbostic  * quotes), calls glob1() to do the real pattern matching, and finally
8540087Sbostic  * sorts the list (unless unsorted operation is requested).  Returns 0
8640087Sbostic  * if things went well, nonzero if errors occurred.  It is not an error
8740087Sbostic  * to find no matches.
8840087Sbostic  */
8940087Sbostic glob(pattern, flags, errfunc, pglob)
9046597Sdonn 	const char *pattern;
9146597Sdonn 	int flags;
9246597Sdonn 	int (*errfunc) __P((char *, int));
9340087Sbostic 	glob_t *pglob;
9440087Sbostic {
9540087Sbostic 	int err, oldpathc;
9646597Sdonn 	char *bufnext, *bufend, *compilebuf;
9746597Sdonn 	const char *compilepat, *patnext;
9840087Sbostic 	char c, patbuf[MAXPATHLEN+1];
9940087Sbostic 
10040087Sbostic 	patnext = pattern;
10140087Sbostic 	if (!(flags & GLOB_APPEND)) {
10240087Sbostic 		pglob->gl_pathc = 0;
10340087Sbostic 		pglob->gl_pathv = NULL;
10440087Sbostic 		if (!(flags & GLOB_DOOFFS))
10540087Sbostic 			pglob->gl_offs = 0;
10640087Sbostic 	}
10747590Sbostic 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
10840087Sbostic 	pglob->gl_errfunc = errfunc;
10940087Sbostic 	oldpathc = pglob->gl_pathc;
11047590Sbostic 	pglob->gl_matchc = 0;
11140087Sbostic 
11240087Sbostic 	bufnext = patbuf;
11340087Sbostic 	bufend = bufnext+MAXPATHLEN;
11440087Sbostic 
11540087Sbostic 	compilebuf = bufnext;
11640087Sbostic 	compilepat = patnext;
11740087Sbostic 	while (bufnext < bufend && (c = *patnext++) != EOS) {
11840087Sbostic 		switch (c) {
11940087Sbostic 		case LBRACKET:
12047590Sbostic 			pglob->gl_flags |= GLOB_MAGCHAR;
12140087Sbostic 			c = *patnext;
12240087Sbostic 			if (c == NOT)
12340087Sbostic 				++patnext;
12440087Sbostic 			if (*patnext == EOS ||
12540087Sbostic 			    strchr(patnext+1, RBRACKET) == NULL) {
12640087Sbostic 				*bufnext++ = LBRACKET;
12740087Sbostic 				if (c == NOT)
12840087Sbostic 					--patnext;
12940087Sbostic 				break;
13040087Sbostic 			}
13140087Sbostic 			*bufnext++ = M_SET;
13240087Sbostic 			if (c == NOT)
13340087Sbostic 				*bufnext++ = M_NOT;
13440087Sbostic 			c = *patnext++;
13540087Sbostic 			do {
13640087Sbostic 				/* todo: quoting */
13740087Sbostic 				*bufnext++ = c;
13840087Sbostic 				if (*patnext == RANGE &&
13940087Sbostic 				    (c = patnext[1]) != RBRACKET) {
14040087Sbostic 					*bufnext++ = M_RNG;
14140087Sbostic 					*bufnext++ = c;
14240087Sbostic 					patnext += 2;
14340087Sbostic 				}
14440087Sbostic 			} while ((c = *patnext++) != RBRACKET);
14540087Sbostic 			*bufnext++ = M_END;
14640087Sbostic 			break;
14740087Sbostic 		case QUESTION:
14847590Sbostic 			pglob->gl_flags |= GLOB_MAGCHAR;
14940087Sbostic 			*bufnext++ = M_ONE;
15040087Sbostic 			break;
15140087Sbostic 		case QUOTE:
15240087Sbostic 			if (!(flags & GLOB_QUOTE))
15340087Sbostic 				*bufnext++ = QUOTE;
15440087Sbostic 			else {
15540087Sbostic 				if ((c = *patnext++) == EOS) {
15640087Sbostic 					c = QUOTE;
15740087Sbostic 					--patnext;
15840087Sbostic 				}
15940087Sbostic 				*bufnext++ = c;
16040087Sbostic 			}
16140087Sbostic 			break;
16240087Sbostic 		case STAR:
16347590Sbostic 			pglob->gl_flags |= GLOB_MAGCHAR;
16440087Sbostic 			*bufnext++ = M_ALL;
16540087Sbostic 			break;
16640087Sbostic 		default:
16740087Sbostic 			*bufnext++ = c;
16840087Sbostic 			break;
16940087Sbostic 		}
17040087Sbostic 	}
17140087Sbostic 	*bufnext = EOS;
17240087Sbostic 
17340087Sbostic 	if ((err = glob1(patbuf, pglob)) != 0)
17440087Sbostic 		return(err);
17540087Sbostic 
17640087Sbostic 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
17740087Sbostic 		if (!(flags & GLOB_QUOTE))
17840087Sbostic 			(void)strcpy(compilebuf, compilepat);
17940087Sbostic 		else {
18040087Sbostic 			/*
18140087Sbostic 			 * copy pattern, interpreting quotes; this is slightly
18240087Sbostic 			 * different than the interpretation of quotes above
18340087Sbostic 			 * -- which should prevail?
18440087Sbostic 			 */
18540087Sbostic 			while (*compilepat != EOS) {
18640087Sbostic 				if (*compilepat == QUOTE) {
18740087Sbostic 					if (*++compilepat == EOS)
18840087Sbostic 						--compilepat;
18940087Sbostic 				}
19040087Sbostic 				*compilebuf++ = *compilepat++;
19140087Sbostic 			}
19240087Sbostic 			*compilebuf = EOS;
19340087Sbostic 		}
19440087Sbostic 		return(globextend(patbuf, pglob));
19540087Sbostic 	} else if (!(flags & GLOB_NOSORT))
19640087Sbostic 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
19740087Sbostic 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
19840087Sbostic 	return(0);
19940087Sbostic }
20040087Sbostic 
20140087Sbostic static
20240087Sbostic glob1(pattern, pglob)
20340087Sbostic 	char *pattern;
20440087Sbostic 	glob_t *pglob;
20540087Sbostic {
20640087Sbostic 	char pathbuf[MAXPATHLEN+1];
20740087Sbostic 
20840087Sbostic 	/*
20940087Sbostic 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
21040087Sbostic 	if (*pattern == EOS)
21140087Sbostic 		return(0);
21240087Sbostic 	return(glob2(pathbuf, pathbuf, pattern, pglob));
21340087Sbostic }
21440087Sbostic 
21540087Sbostic /*
21640087Sbostic  * functions glob2 and glob3 are mutually recursive; there is one level
21740087Sbostic  * of recursion for each segment in the pattern that contains one or
21840087Sbostic  * more meta characters.
21940087Sbostic  */
22040087Sbostic static
22140087Sbostic glob2(pathbuf, pathend, pattern, pglob)
22240087Sbostic 	char *pathbuf, *pathend, *pattern;
22340087Sbostic 	glob_t *pglob;
22440087Sbostic {
225*48357Sbostic 	struct stat sb;
226*48357Sbostic 	bool_t anymeta;
22740087Sbostic 	char *p, *q;
22840087Sbostic 
22940087Sbostic 	/*
23040087Sbostic 	 * loop over pattern segments until end of pattern or until
23140087Sbostic 	 * segment with meta character found.
23240087Sbostic 	 */
233*48357Sbostic 	for (anymeta = 0;;) {
23440087Sbostic 		if (*pattern == EOS) {		/* end of pattern? */
23540087Sbostic 			*pathend = EOS;
236*48357Sbostic 			if (lstat(pathbuf, &sb))
237*48357Sbostic 				return(0);
238*48357Sbostic 			if (pglob->gl_flags & GLOB_MARK && pathend[-1] != SEP &&
239*48357Sbostic 			    (S_ISDIR(sb.st_mode) || S_ISLNK(sb.st_mode) &&
240*48357Sbostic 			    !stat(pathbuf, &sb) && S_ISDIR(sb.st_mode))) {
24140087Sbostic 				*pathend++ = SEP;
24240087Sbostic 				*pathend = EOS;
24340087Sbostic 			}
24447590Sbostic 			++pglob->gl_matchc;
24540087Sbostic 			return(globextend(pathbuf, pglob));
24640087Sbostic 		}
24740087Sbostic 
24840087Sbostic 		/* find end of next segment, copy tentatively to pathend */
24940087Sbostic 		q = pathend;
25040087Sbostic 		p = pattern;
25140087Sbostic 		while (*p != EOS && *p != SEP) {
25240087Sbostic 			if (ismeta(*p))
25340087Sbostic 				anymeta = 1;
25440087Sbostic 			*q++ = *p++;
25540087Sbostic 		}
25640087Sbostic 
25740087Sbostic 		if (!anymeta) {		/* no expansion, do next segment */
25840087Sbostic 			pathend = q;
25940087Sbostic 			pattern = p;
26040087Sbostic 			while (*pattern == SEP)
26140087Sbostic 				*pathend++ = *pattern++;
26240087Sbostic 		} else			/* need expansion, recurse */
26340087Sbostic 			return(glob3(pathbuf, pathend, pattern, p, pglob));
26440087Sbostic 	}
26540087Sbostic 	/* NOTREACHED */
26640087Sbostic }
26740087Sbostic 
26840087Sbostic static
26940087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob)
27040087Sbostic 	char *pathbuf, *pathend, *pattern, *restpattern;
27140087Sbostic 	glob_t *pglob;
27240087Sbostic {
27340087Sbostic 	extern int errno;
27440087Sbostic 	DIR *dirp;
27540087Sbostic 	struct dirent *dp;
27640087Sbostic 	int len, err;
27740087Sbostic 
27840087Sbostic 	*pathend = EOS;
27940087Sbostic 	errno = 0;
28040087Sbostic 	if (!(dirp = opendir(pathbuf)))
28140087Sbostic 		/* todo: don't call for ENOENT or ENOTDIR? */
28240087Sbostic 		if (pglob->gl_errfunc &&
28340087Sbostic 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
28440087Sbostic 		    (pglob->gl_flags & GLOB_ERR))
28540087Sbostic 			return(GLOB_ABEND);
28640087Sbostic 		else
28740087Sbostic 			return(0);
28840087Sbostic 
28940087Sbostic 	err = 0;
29040087Sbostic 
29140087Sbostic 	/* search directory for matching names */
29240087Sbostic 	while ((dp = readdir(dirp))) {
29340087Sbostic 		/* initial DOT must be matched literally */
29440087Sbostic 		if (dp->d_name[0] == DOT && *pattern != DOT)
29540087Sbostic 			continue;
29640087Sbostic 		if (!match(dp->d_name, pattern, restpattern))
29740087Sbostic 			continue;
29840087Sbostic 		len = dp->d_namlen;
29940087Sbostic 		(void)strcpy(pathend, dp->d_name);
30040087Sbostic 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
30140087Sbostic 		if (err)
30240087Sbostic 			break;
30340087Sbostic 	}
30440087Sbostic 	/* todo: check error from readdir? */
30540087Sbostic 	(void)closedir(dirp);
30640087Sbostic 	return(err);
30740087Sbostic }
30840087Sbostic 
30940087Sbostic 
31040087Sbostic /*
31140087Sbostic  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
31240087Sbostic  * add the new item, and update gl_pathc.
31340087Sbostic  *
31440087Sbostic  * This assumes the BSD realloc, which only copies the block when its size
31540087Sbostic  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
31640087Sbostic  * behavior.
31740087Sbostic  *
31840087Sbostic  * Return 0 if new item added, error code if memory couldn't be allocated.
31940087Sbostic  *
32040087Sbostic  * Invariant of the glob_t structure:
32140087Sbostic  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
32240087Sbostic  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
32340087Sbostic  */
32440087Sbostic static
32540087Sbostic globextend(path, pglob)
32640087Sbostic 	char *path;
32740087Sbostic 	glob_t *pglob;
32840087Sbostic {
32940087Sbostic 	register char **pathv;
33040087Sbostic 	register int i;
33140087Sbostic 	u_int copysize, newsize;
33240087Sbostic 	char *copy;
33340087Sbostic 
33440087Sbostic 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
33540087Sbostic 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
33640087Sbostic 	if (pathv == NULL)
33740087Sbostic 		return(GLOB_NOSPACE);
33840087Sbostic 
33940087Sbostic 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
34040087Sbostic 		/* first time around -- clear initial gl_offs items */
34140087Sbostic 		pathv += pglob->gl_offs;
34240087Sbostic 		for (i = pglob->gl_offs; --i >= 0; )
34340087Sbostic 			*--pathv = NULL;
34440087Sbostic 	}
34540087Sbostic 	pglob->gl_pathv = pathv;
34640087Sbostic 
34740087Sbostic 	copysize = strlen(path) + 1;
34840087Sbostic 	if ((copy = malloc(copysize)) != NULL) {
34940087Sbostic 		(void)strcpy(copy, path);
35040087Sbostic 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
35140087Sbostic 	}
35240087Sbostic 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
35340087Sbostic 	return((copy == NULL) ? GLOB_NOSPACE : 0);
35440087Sbostic }
35540087Sbostic 
35640087Sbostic 
35740087Sbostic /*
35840087Sbostic  * pattern matching function for filenames.  Each occurrence of the *
35940087Sbostic  * pattern causes a recursion level.
36040087Sbostic  */
36140087Sbostic static bool_t
36240087Sbostic match(name, pat, patend)
36340087Sbostic 	register char *name, *pat, *patend;
36440087Sbostic {
36540087Sbostic 	bool_t ok, negate_range;
36640087Sbostic 	char c, k;
36740087Sbostic 
36840087Sbostic 	while (pat < patend) {
36940087Sbostic 		c = *pat++;
37040087Sbostic 		switch (c & 0xff) {
37140087Sbostic 		case M_ALL:
37240087Sbostic 			if (pat == patend)
37340087Sbostic 				return(1);
37440087Sbostic 			for (; *name != EOS; ++name) {
37540087Sbostic 				if (match(name, pat, patend))
37640087Sbostic 					return(1);
37740087Sbostic 			}
37840087Sbostic 			return(0);
37940087Sbostic 		case M_ONE:
38040087Sbostic 			if (*name++ == EOS)
38140087Sbostic 				return(0);
38240087Sbostic 			break;
38340087Sbostic 		case M_SET:
38440087Sbostic 			ok = 0;
38540087Sbostic 			k = *name++;
38640087Sbostic 			if (negate_range = (*pat & 0xff) == M_NOT)
38740087Sbostic 				++pat;
38840087Sbostic 			while (((c = *pat++) & 0xff) != M_END) {
38940087Sbostic 				if ((*pat & 0xff) == M_RNG) {
39040087Sbostic 					if (c <= k && k <= pat[1])
39140087Sbostic 						ok = 1;
39240087Sbostic 					pat += 2;
39340087Sbostic 				}
39440087Sbostic 				else if (c == k)
39540087Sbostic 					ok = 1;
39640087Sbostic 			}
39740087Sbostic 			if (ok == negate_range)
39840087Sbostic 				return(0);
39940087Sbostic 			break;
40040087Sbostic 		default:
40140087Sbostic 			if (*name++ != c)
40240087Sbostic 				return(0);
40340087Sbostic 			break;
40440087Sbostic 		}
40540087Sbostic 	}
40640087Sbostic 	return(*name == EOS);
40740087Sbostic }
40840087Sbostic 
40940087Sbostic /* free allocated data belonging to a glob_t structure */
41040087Sbostic void
41140087Sbostic globfree(pglob)
41240087Sbostic 	glob_t *pglob;
41340087Sbostic {
41440087Sbostic 	register int i;
41540087Sbostic 	register char **pp;
41640087Sbostic 
41740087Sbostic 	if (pglob->gl_pathv != NULL) {
41840087Sbostic 		pp = pglob->gl_pathv + pglob->gl_offs;
41940087Sbostic 		for (i = pglob->gl_pathc; i--; ++pp)
42040087Sbostic 			if (*pp)
42147736Sbostic 				free((void *)*pp);
42247736Sbostic 		free((void *)pglob->gl_pathv);
42340087Sbostic 	}
42440087Sbostic }
425