xref: /csrg-svn/lib/libc/gen/glob.c (revision 48357)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.7 (Berkeley) 04/19/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
17  * draft 9.
18  *
19  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
20  *
21  * Optional extra services, controlled by flags not defined by POSIX:
22  *
23  * GLOB_QUOTE:
24  *	Escaping convention: \ inhibits any special meaning the following
25  *	character might have (except \ at end of string is retained).
26  * GLOB_MAGCHAR:
27  *	Set in gl_flags is pattern contained a globbing character.
28  * gl_matchc:
29  *	Number of matches in the current invocation of glob.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <dirent.h>
35 #include <glob.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 typedef int bool_t;
43 
44 static glob1();
45 static glob2();
46 static glob3();
47 static globextend();
48 static bool_t match();
49 
50 #define	DOLLAR		'$'
51 #define	DOT		'.'
52 #define	EOS		'\0'
53 #define	LBRACKET	'['
54 #define	NOT		'!'
55 #define	QUESTION	'?'
56 #define	QUOTE		'\\'
57 #define	RANGE		'-'
58 #define	RBRACKET	']'
59 #define	SEP		'/'
60 #define	STAR		'*'
61 #define	TILDE		'~'
62 #define	UNDERSCORE	'_'
63 
64 #define	METABIT		0x80
65 #define	META(c)		((c)|METABIT)
66 #define	M_ALL		META('*')
67 #define	M_END		META(']')
68 #define	M_NOT		META('!')
69 #define	M_ONE		META('?')
70 #define	M_RNG		META('-')
71 #define	M_SET		META('[')
72 #define	ismeta(c)	(((c)&METABIT) != 0)
73 
74 static
75 compare(p, q)
76 	void **p, **q;
77 {
78 	return(strcmp(*(char **)p, *(char **)q));
79 }
80 
81 
82 /*
83  * The main glob() routine: compiles the pattern (optionally processing
84  * quotes), calls glob1() to do the real pattern matching, and finally
85  * sorts the list (unless unsorted operation is requested).  Returns 0
86  * if things went well, nonzero if errors occurred.  It is not an error
87  * to find no matches.
88  */
89 glob(pattern, flags, errfunc, pglob)
90 	const char *pattern;
91 	int flags;
92 	int (*errfunc) __P((char *, int));
93 	glob_t *pglob;
94 {
95 	int err, oldpathc;
96 	char *bufnext, *bufend, *compilebuf;
97 	const char *compilepat, *patnext;
98 	char c, patbuf[MAXPATHLEN+1];
99 
100 	patnext = pattern;
101 	if (!(flags & GLOB_APPEND)) {
102 		pglob->gl_pathc = 0;
103 		pglob->gl_pathv = NULL;
104 		if (!(flags & GLOB_DOOFFS))
105 			pglob->gl_offs = 0;
106 	}
107 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
108 	pglob->gl_errfunc = errfunc;
109 	oldpathc = pglob->gl_pathc;
110 	pglob->gl_matchc = 0;
111 
112 	bufnext = patbuf;
113 	bufend = bufnext+MAXPATHLEN;
114 
115 	compilebuf = bufnext;
116 	compilepat = patnext;
117 	while (bufnext < bufend && (c = *patnext++) != EOS) {
118 		switch (c) {
119 		case LBRACKET:
120 			pglob->gl_flags |= GLOB_MAGCHAR;
121 			c = *patnext;
122 			if (c == NOT)
123 				++patnext;
124 			if (*patnext == EOS ||
125 			    strchr(patnext+1, RBRACKET) == NULL) {
126 				*bufnext++ = LBRACKET;
127 				if (c == NOT)
128 					--patnext;
129 				break;
130 			}
131 			*bufnext++ = M_SET;
132 			if (c == NOT)
133 				*bufnext++ = M_NOT;
134 			c = *patnext++;
135 			do {
136 				/* todo: quoting */
137 				*bufnext++ = c;
138 				if (*patnext == RANGE &&
139 				    (c = patnext[1]) != RBRACKET) {
140 					*bufnext++ = M_RNG;
141 					*bufnext++ = c;
142 					patnext += 2;
143 				}
144 			} while ((c = *patnext++) != RBRACKET);
145 			*bufnext++ = M_END;
146 			break;
147 		case QUESTION:
148 			pglob->gl_flags |= GLOB_MAGCHAR;
149 			*bufnext++ = M_ONE;
150 			break;
151 		case QUOTE:
152 			if (!(flags & GLOB_QUOTE))
153 				*bufnext++ = QUOTE;
154 			else {
155 				if ((c = *patnext++) == EOS) {
156 					c = QUOTE;
157 					--patnext;
158 				}
159 				*bufnext++ = c;
160 			}
161 			break;
162 		case STAR:
163 			pglob->gl_flags |= GLOB_MAGCHAR;
164 			*bufnext++ = M_ALL;
165 			break;
166 		default:
167 			*bufnext++ = c;
168 			break;
169 		}
170 	}
171 	*bufnext = EOS;
172 
173 	if ((err = glob1(patbuf, pglob)) != 0)
174 		return(err);
175 
176 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
177 		if (!(flags & GLOB_QUOTE))
178 			(void)strcpy(compilebuf, compilepat);
179 		else {
180 			/*
181 			 * copy pattern, interpreting quotes; this is slightly
182 			 * different than the interpretation of quotes above
183 			 * -- which should prevail?
184 			 */
185 			while (*compilepat != EOS) {
186 				if (*compilepat == QUOTE) {
187 					if (*++compilepat == EOS)
188 						--compilepat;
189 				}
190 				*compilebuf++ = *compilepat++;
191 			}
192 			*compilebuf = EOS;
193 		}
194 		return(globextend(patbuf, pglob));
195 	} else if (!(flags & GLOB_NOSORT))
196 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
197 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
198 	return(0);
199 }
200 
201 static
202 glob1(pattern, pglob)
203 	char *pattern;
204 	glob_t *pglob;
205 {
206 	char pathbuf[MAXPATHLEN+1];
207 
208 	/*
209 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
210 	if (*pattern == EOS)
211 		return(0);
212 	return(glob2(pathbuf, pathbuf, pattern, pglob));
213 }
214 
215 /*
216  * functions glob2 and glob3 are mutually recursive; there is one level
217  * of recursion for each segment in the pattern that contains one or
218  * more meta characters.
219  */
220 static
221 glob2(pathbuf, pathend, pattern, pglob)
222 	char *pathbuf, *pathend, *pattern;
223 	glob_t *pglob;
224 {
225 	struct stat sb;
226 	bool_t anymeta;
227 	char *p, *q;
228 
229 	/*
230 	 * loop over pattern segments until end of pattern or until
231 	 * segment with meta character found.
232 	 */
233 	for (anymeta = 0;;) {
234 		if (*pattern == EOS) {		/* end of pattern? */
235 			*pathend = EOS;
236 			if (lstat(pathbuf, &sb))
237 				return(0);
238 			if (pglob->gl_flags & GLOB_MARK && pathend[-1] != SEP &&
239 			    (S_ISDIR(sb.st_mode) || S_ISLNK(sb.st_mode) &&
240 			    !stat(pathbuf, &sb) && S_ISDIR(sb.st_mode))) {
241 				*pathend++ = SEP;
242 				*pathend = EOS;
243 			}
244 			++pglob->gl_matchc;
245 			return(globextend(pathbuf, pglob));
246 		}
247 
248 		/* find end of next segment, copy tentatively to pathend */
249 		q = pathend;
250 		p = pattern;
251 		while (*p != EOS && *p != SEP) {
252 			if (ismeta(*p))
253 				anymeta = 1;
254 			*q++ = *p++;
255 		}
256 
257 		if (!anymeta) {		/* no expansion, do next segment */
258 			pathend = q;
259 			pattern = p;
260 			while (*pattern == SEP)
261 				*pathend++ = *pattern++;
262 		} else			/* need expansion, recurse */
263 			return(glob3(pathbuf, pathend, pattern, p, pglob));
264 	}
265 	/* NOTREACHED */
266 }
267 
268 static
269 glob3(pathbuf, pathend, pattern, restpattern, pglob)
270 	char *pathbuf, *pathend, *pattern, *restpattern;
271 	glob_t *pglob;
272 {
273 	extern int errno;
274 	DIR *dirp;
275 	struct dirent *dp;
276 	int len, err;
277 
278 	*pathend = EOS;
279 	errno = 0;
280 	if (!(dirp = opendir(pathbuf)))
281 		/* todo: don't call for ENOENT or ENOTDIR? */
282 		if (pglob->gl_errfunc &&
283 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
284 		    (pglob->gl_flags & GLOB_ERR))
285 			return(GLOB_ABEND);
286 		else
287 			return(0);
288 
289 	err = 0;
290 
291 	/* search directory for matching names */
292 	while ((dp = readdir(dirp))) {
293 		/* initial DOT must be matched literally */
294 		if (dp->d_name[0] == DOT && *pattern != DOT)
295 			continue;
296 		if (!match(dp->d_name, pattern, restpattern))
297 			continue;
298 		len = dp->d_namlen;
299 		(void)strcpy(pathend, dp->d_name);
300 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
301 		if (err)
302 			break;
303 	}
304 	/* todo: check error from readdir? */
305 	(void)closedir(dirp);
306 	return(err);
307 }
308 
309 
310 /*
311  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
312  * add the new item, and update gl_pathc.
313  *
314  * This assumes the BSD realloc, which only copies the block when its size
315  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
316  * behavior.
317  *
318  * Return 0 if new item added, error code if memory couldn't be allocated.
319  *
320  * Invariant of the glob_t structure:
321  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
322  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
323  */
324 static
325 globextend(path, pglob)
326 	char *path;
327 	glob_t *pglob;
328 {
329 	register char **pathv;
330 	register int i;
331 	u_int copysize, newsize;
332 	char *copy;
333 
334 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
335 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
336 	if (pathv == NULL)
337 		return(GLOB_NOSPACE);
338 
339 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
340 		/* first time around -- clear initial gl_offs items */
341 		pathv += pglob->gl_offs;
342 		for (i = pglob->gl_offs; --i >= 0; )
343 			*--pathv = NULL;
344 	}
345 	pglob->gl_pathv = pathv;
346 
347 	copysize = strlen(path) + 1;
348 	if ((copy = malloc(copysize)) != NULL) {
349 		(void)strcpy(copy, path);
350 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
351 	}
352 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
353 	return((copy == NULL) ? GLOB_NOSPACE : 0);
354 }
355 
356 
357 /*
358  * pattern matching function for filenames.  Each occurrence of the *
359  * pattern causes a recursion level.
360  */
361 static bool_t
362 match(name, pat, patend)
363 	register char *name, *pat, *patend;
364 {
365 	bool_t ok, negate_range;
366 	char c, k;
367 
368 	while (pat < patend) {
369 		c = *pat++;
370 		switch (c & 0xff) {
371 		case M_ALL:
372 			if (pat == patend)
373 				return(1);
374 			for (; *name != EOS; ++name) {
375 				if (match(name, pat, patend))
376 					return(1);
377 			}
378 			return(0);
379 		case M_ONE:
380 			if (*name++ == EOS)
381 				return(0);
382 			break;
383 		case M_SET:
384 			ok = 0;
385 			k = *name++;
386 			if (negate_range = (*pat & 0xff) == M_NOT)
387 				++pat;
388 			while (((c = *pat++) & 0xff) != M_END) {
389 				if ((*pat & 0xff) == M_RNG) {
390 					if (c <= k && k <= pat[1])
391 						ok = 1;
392 					pat += 2;
393 				}
394 				else if (c == k)
395 					ok = 1;
396 			}
397 			if (ok == negate_range)
398 				return(0);
399 			break;
400 		default:
401 			if (*name++ != c)
402 				return(0);
403 			break;
404 		}
405 	}
406 	return(*name == EOS);
407 }
408 
409 /* free allocated data belonging to a glob_t structure */
410 void
411 globfree(pglob)
412 	glob_t *pglob;
413 {
414 	register int i;
415 	register char **pp;
416 
417 	if (pglob->gl_pathv != NULL) {
418 		pp = pglob->gl_pathv + pglob->gl_offs;
419 		for (i = pglob->gl_pathc; i--; ++pp)
420 			if (*pp)
421 				free((void *)*pp);
422 		free((void *)pglob->gl_pathv);
423 	}
424 }
425