xref: /csrg-svn/lib/libc/gen/glob.c (revision 40087)
1*40087Sbostic /*
2*40087Sbostic  * Copyright (c) 1989 The Regents of the University of California.
3*40087Sbostic  * All rights reserved.
4*40087Sbostic  *
5*40087Sbostic  * This code is derived from software contributed to Berkeley by
6*40087Sbostic  * Guido van Rossum.
7*40087Sbostic  *
8*40087Sbostic  * Redistribution and use in source and binary forms are permitted
9*40087Sbostic  * provided that the above copyright notice and this paragraph are
10*40087Sbostic  * duplicated in all such forms and that any documentation,
11*40087Sbostic  * advertising materials, and other materials related to such
12*40087Sbostic  * distribution and use acknowledge that the software was developed
13*40087Sbostic  * by the University of California, Berkeley.  The name of the
14*40087Sbostic  * University may not be used to endorse or promote products derived
15*40087Sbostic  * from this software without specific prior written permission.
16*40087Sbostic  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17*40087Sbostic  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18*40087Sbostic  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19*40087Sbostic  */
20*40087Sbostic 
21*40087Sbostic #if defined(LIBC_SCCS) && !defined(lint)
22*40087Sbostic static char sccsid[] = "@(#)glob.c	5.1 (Berkeley) 02/15/90";
23*40087Sbostic #endif /* LIBC_SCCS and not lint */
24*40087Sbostic 
25*40087Sbostic /*
26*40087Sbostic  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
27*40087Sbostic  * draft 9.
28*40087Sbostic  *
29*40087Sbostic  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
30*40087Sbostic  *
31*40087Sbostic  * Optional extra services, controlled by flags not defined by POSIX:
32*40087Sbostic  *	GLOB_QUOTE: escaping convention: \ inhibits any special meaning
33*40087Sbostic 		the following character might have (except \ at end of
34*40087Sbostic  *		string is kept);
35*40087Sbostic  */
36*40087Sbostic 
37*40087Sbostic #include <sys/param.h>
38*40087Sbostic #include <sys/stat.h>
39*40087Sbostic #include <dirent.h>
40*40087Sbostic #include <glob.h>
41*40087Sbostic #include <ctype.h>
42*40087Sbostic #include <errno.h>
43*40087Sbostic #include <string.h>
44*40087Sbostic #include <stdio.h>
45*40087Sbostic 
46*40087Sbostic char *malloc(), *realloc();
47*40087Sbostic 
48*40087Sbostic typedef int bool_t;
49*40087Sbostic 
50*40087Sbostic #define	DOLLAR		'$'
51*40087Sbostic #define	DOT		'.'
52*40087Sbostic #define	EOS		'\0'
53*40087Sbostic #define	LBRACKET	'['
54*40087Sbostic #define	NOT		'!'
55*40087Sbostic #define	QUESTION	'?'
56*40087Sbostic #define	QUOTE		'\\'
57*40087Sbostic #define	RANGE		'-'
58*40087Sbostic #define	RBRACKET	']'
59*40087Sbostic #define	SEP		'/'
60*40087Sbostic #define	STAR		'*'
61*40087Sbostic #define	TILDE		'~'
62*40087Sbostic #define	UNDERSCORE	'_'
63*40087Sbostic 
64*40087Sbostic #define	METABIT		0x80
65*40087Sbostic #define	META(c)		((c)|METABIT)
66*40087Sbostic #define	M_ALL		META('*')
67*40087Sbostic #define	M_END		META(']')
68*40087Sbostic #define	M_NOT		META('!')
69*40087Sbostic #define	M_ONE		META('?')
70*40087Sbostic #define	M_RNG		META('-')
71*40087Sbostic #define	M_SET		META('[')
72*40087Sbostic #define	ismeta(c)	(((c)&METABIT) != 0)
73*40087Sbostic 
74*40087Sbostic static
75*40087Sbostic compare(p, q)
76*40087Sbostic 	char **p, **q;
77*40087Sbostic {
78*40087Sbostic 	return(strcmp(*p, *q));
79*40087Sbostic }
80*40087Sbostic 
81*40087Sbostic 
82*40087Sbostic /*
83*40087Sbostic  * The main glob() routine: compiles the pattern (optionally processing
84*40087Sbostic  * quotes), calls glob1() to do the real pattern matching, and finally
85*40087Sbostic  * sorts the list (unless unsorted operation is requested).  Returns 0
86*40087Sbostic  * if things went well, nonzero if errors occurred.  It is not an error
87*40087Sbostic  * to find no matches.
88*40087Sbostic  */
89*40087Sbostic glob(pattern, flags, errfunc, pglob)
90*40087Sbostic 	char *pattern;
91*40087Sbostic 	int flags, (*errfunc)();
92*40087Sbostic 	glob_t *pglob;
93*40087Sbostic {
94*40087Sbostic 	int err, oldpathc;
95*40087Sbostic 	char *bufnext, *bufend, *compilebuf, *compilepat, *patnext;
96*40087Sbostic 	char c, patbuf[MAXPATHLEN+1];
97*40087Sbostic 
98*40087Sbostic 	patnext = pattern;
99*40087Sbostic 	if (!(flags & GLOB_APPEND)) {
100*40087Sbostic 		pglob->gl_pathc = 0;
101*40087Sbostic 		pglob->gl_pathv = NULL;
102*40087Sbostic 		if (!(flags & GLOB_DOOFFS))
103*40087Sbostic 			pglob->gl_offs = 0;
104*40087Sbostic 	}
105*40087Sbostic 	pglob->gl_flags = flags;
106*40087Sbostic 	pglob->gl_errfunc = errfunc;
107*40087Sbostic 	oldpathc = pglob->gl_pathc;
108*40087Sbostic 
109*40087Sbostic 	bufnext = patbuf;
110*40087Sbostic 	bufend = bufnext+MAXPATHLEN;
111*40087Sbostic 
112*40087Sbostic 	compilebuf = bufnext;
113*40087Sbostic 	compilepat = patnext;
114*40087Sbostic 	while (bufnext < bufend && (c = *patnext++) != EOS) {
115*40087Sbostic 		switch (c) {
116*40087Sbostic 		case LBRACKET:
117*40087Sbostic 			c = *patnext;
118*40087Sbostic 			if (c == NOT)
119*40087Sbostic 				++patnext;
120*40087Sbostic 			if (*patnext == EOS ||
121*40087Sbostic 			    strchr(patnext+1, RBRACKET) == NULL) {
122*40087Sbostic 				*bufnext++ = LBRACKET;
123*40087Sbostic 				if (c == NOT)
124*40087Sbostic 					--patnext;
125*40087Sbostic 				break;
126*40087Sbostic 			}
127*40087Sbostic 			*bufnext++ = M_SET;
128*40087Sbostic 			if (c == NOT)
129*40087Sbostic 				*bufnext++ = M_NOT;
130*40087Sbostic 			c = *patnext++;
131*40087Sbostic 			do {
132*40087Sbostic 				/* todo: quoting */
133*40087Sbostic 				*bufnext++ = c;
134*40087Sbostic 				if (*patnext == RANGE &&
135*40087Sbostic 				    (c = patnext[1]) != RBRACKET) {
136*40087Sbostic 					*bufnext++ = M_RNG;
137*40087Sbostic 					*bufnext++ = c;
138*40087Sbostic 					patnext += 2;
139*40087Sbostic 				}
140*40087Sbostic 			} while ((c = *patnext++) != RBRACKET);
141*40087Sbostic 			*bufnext++ = M_END;
142*40087Sbostic 			break;
143*40087Sbostic 		case QUESTION:
144*40087Sbostic 			*bufnext++ = M_ONE;
145*40087Sbostic 			break;
146*40087Sbostic 		case QUOTE:
147*40087Sbostic 			if (!(flags & GLOB_QUOTE))
148*40087Sbostic 				*bufnext++ = QUOTE;
149*40087Sbostic 			else {
150*40087Sbostic 				if ((c = *patnext++) == EOS) {
151*40087Sbostic 					c = QUOTE;
152*40087Sbostic 					--patnext;
153*40087Sbostic 				}
154*40087Sbostic 				*bufnext++ = c;
155*40087Sbostic 			}
156*40087Sbostic 			break;
157*40087Sbostic 		case STAR:
158*40087Sbostic 			*bufnext++ = M_ALL;
159*40087Sbostic 			break;
160*40087Sbostic 		default:
161*40087Sbostic 			*bufnext++ = c;
162*40087Sbostic 			break;
163*40087Sbostic 		}
164*40087Sbostic 	}
165*40087Sbostic 	*bufnext = EOS;
166*40087Sbostic 
167*40087Sbostic 	if ((err = glob1(patbuf, pglob)) != 0)
168*40087Sbostic 		return(err);
169*40087Sbostic 
170*40087Sbostic 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
171*40087Sbostic 		if (!(flags & GLOB_QUOTE))
172*40087Sbostic 			(void)strcpy(compilebuf, compilepat);
173*40087Sbostic 		else {
174*40087Sbostic 			/*
175*40087Sbostic 			 * copy pattern, interpreting quotes; this is slightly
176*40087Sbostic 			 * different than the interpretation of quotes above
177*40087Sbostic 			 * -- which should prevail?
178*40087Sbostic 			 */
179*40087Sbostic 			while (*compilepat != EOS) {
180*40087Sbostic 				if (*compilepat == QUOTE) {
181*40087Sbostic 					if (*++compilepat == EOS)
182*40087Sbostic 						--compilepat;
183*40087Sbostic 				}
184*40087Sbostic 				*compilebuf++ = *compilepat++;
185*40087Sbostic 			}
186*40087Sbostic 			*compilebuf = EOS;
187*40087Sbostic 		}
188*40087Sbostic 		return(globextend(patbuf, pglob));
189*40087Sbostic 	} else if (!(flags & GLOB_NOSORT))
190*40087Sbostic 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
191*40087Sbostic 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
192*40087Sbostic 	return(0);
193*40087Sbostic }
194*40087Sbostic 
195*40087Sbostic static
196*40087Sbostic glob1(pattern, pglob)
197*40087Sbostic 	char *pattern;
198*40087Sbostic 	glob_t *pglob;
199*40087Sbostic {
200*40087Sbostic 	char pathbuf[MAXPATHLEN+1];
201*40087Sbostic 
202*40087Sbostic 	/*
203*40087Sbostic 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
204*40087Sbostic 	if (*pattern == EOS)
205*40087Sbostic 		return(0);
206*40087Sbostic 	return(glob2(pathbuf, pathbuf, pattern, pglob));
207*40087Sbostic }
208*40087Sbostic 
209*40087Sbostic /*
210*40087Sbostic  * functions glob2 and glob3 are mutually recursive; there is one level
211*40087Sbostic  * of recursion for each segment in the pattern that contains one or
212*40087Sbostic  * more meta characters.
213*40087Sbostic  */
214*40087Sbostic static
215*40087Sbostic glob2(pathbuf, pathend, pattern, pglob)
216*40087Sbostic 	char *pathbuf, *pathend, *pattern;
217*40087Sbostic 	glob_t *pglob;
218*40087Sbostic {
219*40087Sbostic 	struct stat sbuf;
220*40087Sbostic 	bool_t anymeta = 0;
221*40087Sbostic 	char *p, *q;
222*40087Sbostic 
223*40087Sbostic 	/*
224*40087Sbostic 	 * loop over pattern segments until end of pattern or until
225*40087Sbostic 	 * segment with meta character found.
226*40087Sbostic 	 */
227*40087Sbostic 	for (;;) {
228*40087Sbostic 		if (*pattern == EOS) {		/* end of pattern? */
229*40087Sbostic 			*pathend = EOS;
230*40087Sbostic 			if (stat(pathbuf, &sbuf) != 0)
231*40087Sbostic 				return(0);	/* need error call here? */
232*40087Sbostic 			if ((pglob->gl_flags & GLOB_MARK) &&
233*40087Sbostic 			    pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) {
234*40087Sbostic 				*pathend++ = SEP;
235*40087Sbostic 				*pathend = EOS;
236*40087Sbostic 			}
237*40087Sbostic 			return(globextend(pathbuf, pglob));
238*40087Sbostic 		}
239*40087Sbostic 
240*40087Sbostic 		/* find end of next segment, copy tentatively to pathend */
241*40087Sbostic 		q = pathend;
242*40087Sbostic 		p = pattern;
243*40087Sbostic 		while (*p != EOS && *p != SEP) {
244*40087Sbostic 			if (ismeta(*p))
245*40087Sbostic 				anymeta = 1;
246*40087Sbostic 			*q++ = *p++;
247*40087Sbostic 		}
248*40087Sbostic 
249*40087Sbostic 		if (!anymeta) {		/* no expansion, do next segment */
250*40087Sbostic 			pathend = q;
251*40087Sbostic 			pattern = p;
252*40087Sbostic 			while (*pattern == SEP)
253*40087Sbostic 				*pathend++ = *pattern++;
254*40087Sbostic 		} else			/* need expansion, recurse */
255*40087Sbostic 			return(glob3(pathbuf, pathend, pattern, p, pglob));
256*40087Sbostic 	}
257*40087Sbostic 	/* NOTREACHED */
258*40087Sbostic }
259*40087Sbostic 
260*40087Sbostic static
261*40087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob)
262*40087Sbostic 	char *pathbuf, *pathend, *pattern, *restpattern;
263*40087Sbostic 	glob_t *pglob;
264*40087Sbostic {
265*40087Sbostic 	extern int errno;
266*40087Sbostic 	DIR *dirp;
267*40087Sbostic 	struct dirent *dp;
268*40087Sbostic 	int len, err;
269*40087Sbostic 
270*40087Sbostic 	*pathend = EOS;
271*40087Sbostic 	errno = 0;
272*40087Sbostic 	if (!(dirp = opendir(pathbuf)))
273*40087Sbostic 		/* todo: don't call for ENOENT or ENOTDIR? */
274*40087Sbostic 		if (pglob->gl_errfunc &&
275*40087Sbostic 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
276*40087Sbostic 		    (pglob->gl_flags & GLOB_ERR))
277*40087Sbostic 			return(GLOB_ABEND);
278*40087Sbostic 		else
279*40087Sbostic 			return(0);
280*40087Sbostic 
281*40087Sbostic 	err = 0;
282*40087Sbostic 
283*40087Sbostic 	/* search directory for matching names */
284*40087Sbostic 	while ((dp = readdir(dirp))) {
285*40087Sbostic 		/* initial DOT must be matched literally */
286*40087Sbostic 		if (dp->d_name[0] == DOT && *pattern != DOT)
287*40087Sbostic 			continue;
288*40087Sbostic 		if (!match(dp->d_name, pattern, restpattern))
289*40087Sbostic 			continue;
290*40087Sbostic 		len = dp->d_namlen;
291*40087Sbostic 		(void)strcpy(pathend, dp->d_name);
292*40087Sbostic 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
293*40087Sbostic 		if (err)
294*40087Sbostic 			break;
295*40087Sbostic 	}
296*40087Sbostic 	/* todo: check error from readdir? */
297*40087Sbostic 	(void)closedir(dirp);
298*40087Sbostic 	return(err);
299*40087Sbostic }
300*40087Sbostic 
301*40087Sbostic 
302*40087Sbostic /*
303*40087Sbostic  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
304*40087Sbostic  * add the new item, and update gl_pathc.
305*40087Sbostic  *
306*40087Sbostic  * This assumes the BSD realloc, which only copies the block when its size
307*40087Sbostic  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
308*40087Sbostic  * behavior.
309*40087Sbostic  *
310*40087Sbostic  * Return 0 if new item added, error code if memory couldn't be allocated.
311*40087Sbostic  *
312*40087Sbostic  * Invariant of the glob_t structure:
313*40087Sbostic  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
314*40087Sbostic  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
315*40087Sbostic  */
316*40087Sbostic static
317*40087Sbostic globextend(path, pglob)
318*40087Sbostic 	char *path;
319*40087Sbostic 	glob_t *pglob;
320*40087Sbostic {
321*40087Sbostic 	register char **pathv;
322*40087Sbostic 	register int i;
323*40087Sbostic 	u_int copysize, newsize;
324*40087Sbostic 	char *copy;
325*40087Sbostic 
326*40087Sbostic 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
327*40087Sbostic 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
328*40087Sbostic 	if (pathv == NULL)
329*40087Sbostic 		return(GLOB_NOSPACE);
330*40087Sbostic 
331*40087Sbostic 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
332*40087Sbostic 		/* first time around -- clear initial gl_offs items */
333*40087Sbostic 		pathv += pglob->gl_offs;
334*40087Sbostic 		for (i = pglob->gl_offs; --i >= 0; )
335*40087Sbostic 			*--pathv = NULL;
336*40087Sbostic 	}
337*40087Sbostic 	pglob->gl_pathv = pathv;
338*40087Sbostic 
339*40087Sbostic 	copysize = strlen(path) + 1;
340*40087Sbostic 	if ((copy = malloc(copysize)) != NULL) {
341*40087Sbostic 		(void)strcpy(copy, path);
342*40087Sbostic 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
343*40087Sbostic 	}
344*40087Sbostic 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
345*40087Sbostic 	return((copy == NULL) ? GLOB_NOSPACE : 0);
346*40087Sbostic }
347*40087Sbostic 
348*40087Sbostic 
349*40087Sbostic /*
350*40087Sbostic  * pattern matching function for filenames.  Each occurrence of the *
351*40087Sbostic  * pattern causes a recursion level.
352*40087Sbostic  */
353*40087Sbostic static bool_t
354*40087Sbostic match(name, pat, patend)
355*40087Sbostic 	register char *name, *pat, *patend;
356*40087Sbostic {
357*40087Sbostic 	bool_t ok, negate_range;
358*40087Sbostic 	char c, k;
359*40087Sbostic 
360*40087Sbostic 	while (pat < patend) {
361*40087Sbostic 		c = *pat++;
362*40087Sbostic 		switch (c & 0xff) {
363*40087Sbostic 		case M_ALL:
364*40087Sbostic 			if (pat == patend)
365*40087Sbostic 				return(1);
366*40087Sbostic 			for (; *name != EOS; ++name) {
367*40087Sbostic 				if (match(name, pat, patend))
368*40087Sbostic 					return(1);
369*40087Sbostic 			}
370*40087Sbostic 			return(0);
371*40087Sbostic 		case M_ONE:
372*40087Sbostic 			if (*name++ == EOS)
373*40087Sbostic 				return(0);
374*40087Sbostic 			break;
375*40087Sbostic 		case M_SET:
376*40087Sbostic 			ok = 0;
377*40087Sbostic 			k = *name++;
378*40087Sbostic 			if (negate_range = (*pat & 0xff) == M_NOT)
379*40087Sbostic 				++pat;
380*40087Sbostic 			while (((c = *pat++) & 0xff) != M_END) {
381*40087Sbostic 				if ((*pat & 0xff) == M_RNG) {
382*40087Sbostic 					if (c <= k && k <= pat[1])
383*40087Sbostic 						ok = 1;
384*40087Sbostic 					pat += 2;
385*40087Sbostic 				}
386*40087Sbostic 				else if (c == k)
387*40087Sbostic 					ok = 1;
388*40087Sbostic 			}
389*40087Sbostic 			if (ok == negate_range)
390*40087Sbostic 				return(0);
391*40087Sbostic 			break;
392*40087Sbostic 		default:
393*40087Sbostic 			if (*name++ != c)
394*40087Sbostic 				return(0);
395*40087Sbostic 			break;
396*40087Sbostic 		}
397*40087Sbostic 	}
398*40087Sbostic 	return(*name == EOS);
399*40087Sbostic }
400*40087Sbostic 
401*40087Sbostic /* free allocated data belonging to a glob_t structure */
402*40087Sbostic void
403*40087Sbostic globfree(pglob)
404*40087Sbostic 	glob_t *pglob;
405*40087Sbostic {
406*40087Sbostic 	register int i;
407*40087Sbostic 	register char **pp;
408*40087Sbostic 
409*40087Sbostic 	if (pglob->gl_pathv != NULL) {
410*40087Sbostic 		pp = pglob->gl_pathv + pglob->gl_offs;
411*40087Sbostic 		for (i = pglob->gl_pathc; i--; ++pp)
412*40087Sbostic 			if (*pp)
413*40087Sbostic 				(void)free(*pp);
414*40087Sbostic 		(void)free((char *)pp);
415*40087Sbostic 	}
416*40087Sbostic }
417