xref: /csrg-svn/lib/libc/gen/glob.c (revision 50050)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.10 (Berkeley) 06/17/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * glob(3) -- a superset of the one defined in POSIX 1003.2.
17  *
18  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
19  *
20  * Optional extra services, controlled by flags not defined by POSIX:
21  *
22  * GLOB_QUOTE:
23  *	Escaping convention: \ inhibits any special meaning the following
24  *	character might have (except \ at end of string is retained).
25  * GLOB_MAGCHAR:
26  *	Set in gl_flags if pattern contained a globbing character.
27  * gl_matchc:
28  *	Number of matches in the current invocation of glob.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <dirent.h>
35 #include <glob.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 #define	DOLLAR		'$'
43 #define	DOT		'.'
44 #define	EOS		'\0'
45 #define	LBRACKET	'['
46 #define	NOT		'!'
47 #define	QUESTION	'?'
48 #define	QUOTE		'\\'
49 #define	RANGE		'-'
50 #define	RBRACKET	']'
51 #define	SEP		'/'
52 #define	STAR		'*'
53 #define	TILDE		'~'
54 #define	UNDERSCORE	'_'
55 
56 #define	M_QUOTE		0x8000
57 #define	M_PROTECT	0x4000
58 #define	M_MASK		0xffff
59 
60 #define	META(c)		((c)|M_QUOTE)
61 #define	M_ALL		META('*')
62 #define	M_END		META(']')
63 #define	M_NOT		META('!')
64 #define	M_ONE		META('?')
65 #define	M_RNG		META('-')
66 #define	M_SET		META('[')
67 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
68 
69 typedef u_short Char;
70 
71 static int	 compare __P((const void *, const void *));
72 static void	 g_Ctoc __P((Char *, char *));
73 static int	 g_lstat __P((Char *, struct stat *));
74 static DIR	*g_opendir __P((Char *));
75 static Char	*g_strchr __P((Char *, int));
76 static int	 g_stat __P((Char *, struct stat *));
77 static int	 glob1 __P((Char *, glob_t *));
78 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
79 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
80 static int	 globextend __P((Char *, glob_t *));
81 static int	 match __P((Char *, Char *, Char *));
82 #ifdef DEBUG
83 static void	 qprintf __P((Char *));
84 #endif
85 
86 /*
87  * The main glob() routine: compiles the pattern (optionally processing
88  * quotes), calls glob1() to do the real pattern matching, and finally
89  * sorts the list (unless unsorted operation is requested).  Returns 0
90  * if things went well, nonzero if errors occurred.  It is not an error
91  * to find no matches.
92  */
93 glob(pattern, flags, errfunc, pglob)
94 	const char *pattern;
95 	int flags, (*errfunc) __P((char *, int));
96 	glob_t *pglob;
97 {
98 	const char *compilepat, *patnext;
99 	int err, oldpathc;
100 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
101 	char c;
102 
103 	patnext = pattern;
104 	if (!(flags & GLOB_APPEND)) {
105 		pglob->gl_pathc = 0;
106 		pglob->gl_pathv = NULL;
107 		if (!(flags & GLOB_DOOFFS))
108 			pglob->gl_offs = 0;
109 	}
110 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
111 	pglob->gl_errfunc = errfunc;
112 	oldpathc = pglob->gl_pathc;
113 	pglob->gl_matchc = 0;
114 
115 	bufnext = patbuf;
116 	bufend = bufnext + MAXPATHLEN;
117 	compilebuf = bufnext;
118 	compilepat = patnext;
119 	if (flags & GLOB_QUOTE) {
120 		/* Protect the quoted characters. */
121 		while (bufnext < bufend && (c = *patnext++) != EOS)
122 			if (c == QUOTE) {
123 				if ((c = *patnext++) == EOS) {
124 					c = QUOTE;
125 					--patnext;
126 				}
127 				*bufnext++ = c | M_PROTECT;
128 			}
129 			else
130 				*bufnext++ = c;
131 	}
132 	else
133 	    while (bufnext < bufend && (c = *patnext++) != EOS)
134 		    *bufnext++ = c;
135 	*bufnext = EOS;
136 
137 	bufnext = patbuf;
138 	qpatnext = patbuf;
139 	/* We don't need to check for buffer overflow any more. */
140 	while ((c = *qpatnext++) != EOS) {
141 		switch (c) {
142 		case LBRACKET:
143 			pglob->gl_flags |= GLOB_MAGCHAR;
144 			c = *qpatnext;
145 			if (c == NOT)
146 				++qpatnext;
147 			if (*qpatnext == EOS ||
148 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
149 				*bufnext++ = LBRACKET;
150 				if (c == NOT)
151 					--qpatnext;
152 				break;
153 			}
154 			*bufnext++ = M_SET;
155 			if (c == NOT)
156 				*bufnext++ = M_NOT;
157 			c = *qpatnext++;
158 			do {
159 				*bufnext++ = c;
160 				if (*qpatnext == RANGE &&
161 				    (c = qpatnext[1]) != RBRACKET) {
162 					*bufnext++ = M_RNG;
163 					*bufnext++ = c;
164 					qpatnext += 2;
165 				}
166 			} while ((c = *qpatnext++) != RBRACKET);
167 			*bufnext++ = M_END;
168 			break;
169 		case QUESTION:
170 			pglob->gl_flags |= GLOB_MAGCHAR;
171 			*bufnext++ = M_ONE;
172 			break;
173 		case STAR:
174 			pglob->gl_flags |= GLOB_MAGCHAR;
175 			*bufnext++ = M_ALL;
176 			break;
177 		default:
178 			*bufnext++ = c;
179 			break;
180 		}
181 	}
182 	*bufnext = EOS;
183 #ifdef DEBUG
184 	qprintf(patbuf);
185 #endif
186 
187 	if ((err = glob1(patbuf, pglob)) != 0)
188 		return(err);
189 
190 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
191 		if (!(flags & GLOB_QUOTE)) {
192 			Char *dp = compilebuf;
193 			const char *sp = compilepat;
194 			while (*dp++ = (u_char)*sp++);
195 		}
196 		else {
197 			/*
198 			 * Copy pattern, interpreting quotes; this is slightly
199 			 * different than the interpretation of quotes above
200 			 * -- which should prevail?
201 			 */
202 			while (*compilepat != EOS) {
203 				if (*compilepat == QUOTE) {
204 					if (*++compilepat == EOS)
205 						--compilepat;
206 				}
207 				*compilebuf++ = (u_char)*compilepat++;
208 			}
209 			*compilebuf = EOS;
210 		}
211 		return(globextend(patbuf, pglob));
212 	} else if (!(flags & GLOB_NOSORT))
213 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
214 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
215 	return(0);
216 }
217 
218 static int
219 compare(p, q)
220 	const void *p, *q;
221 {
222 	return(strcmp(*(char **)p, *(char **)q));
223 }
224 
225 static
226 glob1(pattern, pglob)
227 	Char *pattern;
228 	glob_t *pglob;
229 {
230 	Char pathbuf[MAXPATHLEN+1];
231 
232 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
233 	if (*pattern == EOS)
234 		return(0);
235 	return(glob2(pathbuf, pathbuf, pattern, pglob));
236 }
237 
238 /*
239  * The functions glob2 and glob3 are mutually recursive; there is one level
240  * of recursion for each segment in the pattern that contains one or more
241  * meta characters.
242  */
243 static
244 glob2(pathbuf, pathend, pattern, pglob)
245 	Char *pathbuf, *pathend, *pattern;
246 	glob_t *pglob;
247 {
248 	struct stat sb;
249 	Char *p, *q;
250 	int anymeta;
251 
252 	/*
253 	 * Loop over pattern segments until end of pattern or until
254 	 * segment with meta character found.
255 	 */
256 	for (anymeta = 0;;) {
257 		if (*pattern == EOS) {		/* End of pattern? */
258 			*pathend = EOS;
259 			if (g_stat(pathbuf, &sb))
260 				return(0);
261 
262 			if (((pglob->gl_flags & GLOB_MARK) &&
263 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
264 			    || (S_ISLNK(sb.st_mode) &&
265 			    (g_stat(pathbuf, &sb) == 0) &&
266 			    S_ISDIR(sb.st_mode)))) {
267 				*pathend++ = SEP;
268 				*pathend = EOS;
269 			}
270 			++pglob->gl_matchc;
271 			return(globextend(pathbuf, pglob));
272 		}
273 
274 		/* Find end of next segment, copy tentatively to pathend. */
275 		q = pathend;
276 		p = pattern;
277 		while (*p != EOS && *p != SEP) {
278 			if (ismeta(*p))
279 				anymeta = 1;
280 			*q++ = *p++;
281 		}
282 
283 		if (!anymeta) {		/* No expansion, do next segment. */
284 			pathend = q;
285 			pattern = p;
286 			while (*pattern == SEP)
287 				*pathend++ = *pattern++;
288 		} else			/* Need expansion, recurse. */
289 			return(glob3(pathbuf, pathend, pattern, p, pglob));
290 	}
291 	/* NOTREACHED */
292 }
293 
294 static
295 glob3(pathbuf, pathend, pattern, restpattern, pglob)
296 	Char *pathbuf, *pathend, *pattern, *restpattern;
297 	glob_t *pglob;
298 {
299 	register struct dirent *dp;
300 	DIR *dirp;
301 	int len, err;
302 
303 	*pathend = EOS;
304 	errno = 0;
305 
306 	if (!(dirp = g_opendir(pathbuf)))
307 		/* TODO: don't call for ENOENT or ENOTDIR? */
308 		if (pglob->gl_errfunc &&
309 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
310 		    (pglob->gl_flags & GLOB_ERR))
311 			return(GLOB_ABEND);
312 		else
313 			return(0);
314 
315 	err = 0;
316 
317 	/* Search directory for matching names. */
318 	while ((dp = readdir(dirp))) {
319 		register char *sc;
320 		register Char *dc;
321 
322 		/* Initial DOT must be matched literally. */
323 		if (dp->d_name[0] == DOT && *pattern != DOT)
324 			continue;
325 		for (sc = dp->d_name, dc = pathend;
326 		     *dc++ = (u_char)*sc++;);
327 		if (!match(pathend, pattern, restpattern)) {
328 			*pathend = EOS;
329 			continue;
330 		}
331 		err = glob2(pathbuf, --dc, restpattern, pglob);
332 		if (err)
333 			break;
334 	}
335 
336 	/* TODO: check error from readdir? */
337 	(void)closedir(dirp);
338 	return(err);
339 }
340 
341 
342 /*
343  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
344  * add the new item, and update gl_pathc.
345  *
346  * This assumes the BSD realloc, which only copies the block when its size
347  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
348  * behavior.
349  *
350  * Return 0 if new item added, error code if memory couldn't be allocated.
351  *
352  * Invariant of the glob_t structure:
353  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
354  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
355  */
356 static int
357 globextend(path, pglob)
358 	Char *path;
359 	glob_t *pglob;
360 {
361 	register char **pathv;
362 	register int i;
363 	u_int newsize;
364 	char *copy;
365 	Char *p;
366 
367 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
368 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
369 	if (pathv == NULL)
370 		return(GLOB_NOSPACE);
371 
372 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
373 		/* first time around -- clear initial gl_offs items */
374 		pathv += pglob->gl_offs;
375 		for (i = pglob->gl_offs; --i >= 0; )
376 			*--pathv = NULL;
377 	}
378 	pglob->gl_pathv = pathv;
379 
380 	for (p = path; *p++;);
381 	if ((copy = malloc(p - path)) != NULL) {
382 		g_Ctoc(path, copy);
383 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
384 	}
385 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
386 	return(copy == NULL ? GLOB_NOSPACE : 0);
387 }
388 
389 
390 /*
391  * pattern matching function for filenames.  Each occurrence of the *
392  * pattern causes a recursion level.
393  */
394 static
395 match(name, pat, patend)
396 	register Char *name, *pat, *patend;
397 {
398 	int ok, negate_range;
399 	Char c, k;
400 
401 	while (pat < patend) {
402 		c = *pat++;
403 		switch (c & M_MASK) {
404 		case M_ALL:
405 			if (pat == patend)
406 				return(1);
407 			for (; *name != EOS; ++name)
408 				if (match(name, pat, patend))
409 					return(1);
410 			return(0);
411 		case M_ONE:
412 			if (*name++ == EOS)
413 				return(0);
414 			break;
415 		case M_SET:
416 			ok = 0;
417 			k = *name++;
418 			if (negate_range = ((*pat & M_MASK) == M_NOT))
419 				++pat;
420 			while (((c = *pat++) & M_MASK) != M_END)
421 				if ((*pat & M_MASK) == M_RNG) {
422 					if (c <= k && k <= pat[1])
423 						ok = 1;
424 					pat += 2;
425 				} else if (c == k)
426 					ok = 1;
427 			if (ok == negate_range)
428 				return(0);
429 			break;
430 		default:
431 			if (*name++ != c)
432 				return(0);
433 			break;
434 		}
435 	}
436 	return(*name == EOS);
437 }
438 
439 /* Free allocated data belonging to a glob_t structure. */
440 void
441 globfree(pglob)
442 	glob_t *pglob;
443 {
444 	register int i;
445 	register char **pp;
446 
447 	if (pglob->gl_pathv != NULL) {
448 		pp = pglob->gl_pathv + pglob->gl_offs;
449 		for (i = pglob->gl_pathc; i--; ++pp)
450 			if (*pp)
451 				free(*pp);
452 		free(pglob->gl_pathv);
453 	}
454 }
455 
456 static DIR *
457 g_opendir(str)
458 	register Char *str;
459 {
460 	char buf[MAXPATHLEN];
461 
462 	if (!*str)
463 		return(opendir("."));
464 	g_Ctoc(str, buf);
465 	return(opendir(buf));
466 }
467 
468 static int
469 g_lstat(fn, sb)
470 	register Char *fn;
471 	struct stat *sb;
472 {
473 	char buf[MAXPATHLEN];
474 
475 	g_Ctoc(fn, buf);
476 	return(lstat(buf, sb));
477 }
478 
479 static int
480 g_stat(fn, sb)
481 	register Char *fn;
482 	struct stat *sb;
483 {
484 	char buf[MAXPATHLEN];
485 
486 	g_Ctoc(fn, buf);
487 	return(stat(buf, sb));
488 }
489 
490 static Char *
491 g_strchr(str, ch)
492 	Char *str;
493 	int ch;
494 {
495 	do {
496 		if (*str == ch)
497 			return (str);
498 	} while (*str++);
499 	return (NULL);
500 }
501 
502 static void
503 g_Ctoc(str, buf)
504 	register Char *str;
505 	char *buf;
506 {
507 	register char *dc;
508 
509 	for (dc = buf; *dc++ = *str++;);
510 }
511 
512 #ifdef DEBUG
513 static void
514 qprintf(s)
515 	register Char *s;
516 {
517 	register Char *p;
518 
519 	for (p = s; *p; p++)
520 		(void)printf("%c", *p & 0xff);
521 	(void)printf("\n");
522 	for (p = s; *p; p++)
523 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
524 	(void)printf("\n");
525 	for (p = s; *p; p++)
526 		(void)printf("%c", *p & M_META ? '_' : ' ');
527 	(void)printf("\n");
528 }
529 #endif
530