xref: /csrg-svn/lib/libc/gen/glob.c (revision 57006)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.18 (Berkeley) 12/04/92";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * glob(3) -- a superset of the one defined in POSIX 1003.2.
17  *
18  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
19  *
20  * Optional extra services, controlled by flags not defined by POSIX:
21  *
22  * GLOB_QUOTE:
23  *	Escaping convention: \ inhibits any special meaning the following
24  *	character might have (except \ at end of string is retained).
25  * GLOB_MAGCHAR:
26  *	Set in gl_flags if pattern contained a globbing character.
27  * GLOB_NOMAGIC:
28  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
29  *	not contain any magic characters.  [Used in csh style globbing]
30  * GLOB_ALTDIRFUNC:
31  *	Use alternately specified directory access functions.
32  * gl_matchc:
33  *	Number of matches in the current invocation of glob.
34  */
35 
36 #include <sys/param.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 #include <glob.h>
40 #include <ctype.h>
41 #include <errno.h>
42 #include <string.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 
46 #define	DOLLAR		'$'
47 #define	DOT		'.'
48 #define	EOS		'\0'
49 #define	LBRACKET	'['
50 #define	NOT		'!'
51 #define	QUESTION	'?'
52 #define	QUOTE		'\\'
53 #define	RANGE		'-'
54 #define	RBRACKET	']'
55 #define	SEP		'/'
56 #define	STAR		'*'
57 #define	TILDE		'~'
58 #define	UNDERSCORE	'_'
59 
60 #define	M_QUOTE		0x8000
61 #define	M_PROTECT	0x4000
62 #define	M_MASK		0xffff
63 #define	M_ASCII		0x00ff
64 
65 #define	CHAR(c)		((c)&M_ASCII)
66 #define	META(c)		((c)|M_QUOTE)
67 #define	M_ALL		META('*')
68 #define	M_END		META(']')
69 #define	M_NOT		META('!')
70 #define	M_ONE		META('?')
71 #define	M_RNG		META('-')
72 #define	M_SET		META('[')
73 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
74 
75 typedef u_short Char;
76 
77 static int	 compare __P((const void *, const void *));
78 static void	 g_Ctoc __P((Char *, char *));
79 static int	 g_lstat __P((Char *, struct stat *, glob_t *));
80 static DIR	*g_opendir __P((Char *, glob_t *));
81 static Char	*g_strchr __P((Char *, int));
82 static int	 g_stat __P((Char *, struct stat *, glob_t *));
83 static int	 glob1 __P((Char *, glob_t *));
84 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
85 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
86 static int	 globextend __P((Char *, glob_t *));
87 static int	 match __P((Char *, Char *, Char *));
88 #ifdef DEBUG
89 static void	 qprintf __P((Char *));
90 #endif
91 
92 /*
93  * The main glob() routine: compiles the pattern (optionally processing
94  * quotes), calls glob1() to do the real pattern matching, and finally
95  * sorts the list (unless unsorted operation is requested).  Returns 0
96  * if things went well, nonzero if errors occurred.  It is not an error
97  * to find no matches.
98  */
99 glob(pattern, flags, errfunc, pglob)
100 	const char *pattern;
101 	int flags, (*errfunc) __P((char *, int));
102 	glob_t *pglob;
103 {
104 	const u_char *compilepat, *patnext;
105 	int c, err, oldpathc;
106 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
107 
108 	patnext = (u_char *) pattern;
109 	if (!(flags & GLOB_APPEND)) {
110 		pglob->gl_pathc = 0;
111 		pglob->gl_pathv = NULL;
112 		if (!(flags & GLOB_DOOFFS))
113 			pglob->gl_offs = 0;
114 	}
115 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
116 	pglob->gl_errfunc = errfunc;
117 	oldpathc = pglob->gl_pathc;
118 	pglob->gl_matchc = 0;
119 
120 	bufnext = patbuf;
121 	bufend = bufnext + MAXPATHLEN;
122 	compilebuf = bufnext;
123 	compilepat = patnext;
124 	if (flags & GLOB_QUOTE) {
125 		/* Protect the quoted characters. */
126 		while (bufnext < bufend && (c = *patnext++) != EOS)
127 			if (c == QUOTE) {
128 				if ((c = *patnext++) == EOS) {
129 					c = QUOTE;
130 					--patnext;
131 				}
132 				*bufnext++ = c | M_PROTECT;
133 			}
134 			else
135 				*bufnext++ = c;
136 	}
137 	else
138 	    while (bufnext < bufend && (c = *patnext++) != EOS)
139 		    *bufnext++ = c;
140 	*bufnext = EOS;
141 
142 	bufnext = patbuf;
143 	qpatnext = patbuf;
144 	/* We don't need to check for buffer overflow any more. */
145 	while ((c = *qpatnext++) != EOS) {
146 		switch (c) {
147 		case LBRACKET:
148 			c = *qpatnext;
149 			if (c == NOT)
150 				++qpatnext;
151 			if (*qpatnext == EOS ||
152 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
153 				*bufnext++ = LBRACKET;
154 				if (c == NOT)
155 					--qpatnext;
156 				break;
157 			}
158 			*bufnext++ = M_SET;
159 			if (c == NOT)
160 				*bufnext++ = M_NOT;
161 			c = *qpatnext++;
162 			do {
163 				*bufnext++ = CHAR(c);
164 				if (*qpatnext == RANGE &&
165 				    (c = qpatnext[1]) != RBRACKET) {
166 					*bufnext++ = M_RNG;
167 					*bufnext++ = CHAR(c);
168 					qpatnext += 2;
169 				}
170 			} while ((c = *qpatnext++) != RBRACKET);
171 			pglob->gl_flags |= GLOB_MAGCHAR;
172 			*bufnext++ = M_END;
173 			break;
174 		case QUESTION:
175 			pglob->gl_flags |= GLOB_MAGCHAR;
176 			*bufnext++ = M_ONE;
177 			break;
178 		case STAR:
179 			pglob->gl_flags |= GLOB_MAGCHAR;
180 			/* collapse adjacent stars to one,
181 			 * to avoid exponential behavior
182 			 */
183 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
184 			    *bufnext++ = M_ALL;
185 			break;
186 		default:
187 			*bufnext++ = CHAR(c);
188 			break;
189 		}
190 	}
191 	*bufnext = EOS;
192 #ifdef DEBUG
193 	qprintf(patbuf);
194 #endif
195 
196 	if ((err = glob1(patbuf, pglob)) != 0)
197 		return(err);
198 
199 	/*
200 	 * If there was no match we are going to append the pattern
201 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
202 	 * and the pattern did not contain any magic characters
203 	 * GLOB_NOMAGIC is there just for compatibility with csh.
204 	 */
205 	if (pglob->gl_pathc == oldpathc &&
206 	    ((flags & GLOB_NOCHECK) ||
207 	     ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
208 		if (!(flags & GLOB_QUOTE)) {
209 			Char *dp = compilebuf;
210 			const u_char *sp = compilepat;
211 			while (*dp++ = *sp++);
212 		}
213 		else {
214 			/*
215 			 * Copy pattern, interpreting quotes; this is slightly
216 			 * different than the interpretation of quotes above
217 			 * -- which should prevail?
218 			 */
219 			while (*compilepat != EOS) {
220 				if (*compilepat == QUOTE) {
221 					if (*++compilepat == EOS)
222 						--compilepat;
223 				}
224 				*compilebuf++ = (u_char)*compilepat++;
225 			}
226 			*compilebuf = EOS;
227 		}
228 		return(globextend(patbuf, pglob));
229 	} else if (!(flags & GLOB_NOSORT))
230 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
231 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
232 	return(0);
233 }
234 
235 static int
236 compare(p, q)
237 	const void *p, *q;
238 {
239 	return(strcmp(*(char **)p, *(char **)q));
240 }
241 
242 static
243 glob1(pattern, pglob)
244 	Char *pattern;
245 	glob_t *pglob;
246 {
247 	Char pathbuf[MAXPATHLEN+1];
248 
249 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
250 	if (*pattern == EOS)
251 		return(0);
252 	return(glob2(pathbuf, pathbuf, pattern, pglob));
253 }
254 
255 /*
256  * The functions glob2 and glob3 are mutually recursive; there is one level
257  * of recursion for each segment in the pattern that contains one or more
258  * meta characters.
259  */
260 static
261 glob2(pathbuf, pathend, pattern, pglob)
262 	Char *pathbuf, *pathend, *pattern;
263 	glob_t *pglob;
264 {
265 	struct stat sb;
266 	Char *p, *q;
267 	int anymeta;
268 
269 	/*
270 	 * Loop over pattern segments until end of pattern or until
271 	 * segment with meta character found.
272 	 */
273 	for (anymeta = 0;;) {
274 		if (*pattern == EOS) {		/* End of pattern? */
275 			*pathend = EOS;
276 			if (g_lstat(pathbuf, &sb, pglob))
277 				return(0);
278 
279 			if (((pglob->gl_flags & GLOB_MARK) &&
280 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
281 			    || (S_ISLNK(sb.st_mode) &&
282 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
283 			    S_ISDIR(sb.st_mode)))) {
284 				*pathend++ = SEP;
285 				*pathend = EOS;
286 			}
287 			++pglob->gl_matchc;
288 			return(globextend(pathbuf, pglob));
289 		}
290 
291 		/* Find end of next segment, copy tentatively to pathend. */
292 		q = pathend;
293 		p = pattern;
294 		while (*p != EOS && *p != SEP) {
295 			if (ismeta(*p))
296 				anymeta = 1;
297 			*q++ = *p++;
298 		}
299 
300 		if (!anymeta) {		/* No expansion, do next segment. */
301 			pathend = q;
302 			pattern = p;
303 			while (*pattern == SEP)
304 				*pathend++ = *pattern++;
305 		} else			/* Need expansion, recurse. */
306 			return(glob3(pathbuf, pathend, pattern, p, pglob));
307 	}
308 	/* NOTREACHED */
309 }
310 
311 static
312 glob3(pathbuf, pathend, pattern, restpattern, pglob)
313 	Char *pathbuf, *pathend, *pattern, *restpattern;
314 	glob_t *pglob;
315 {
316 	register struct dirent *dp;
317 	struct dirent *(*readdirfunc)();
318 	DIR *dirp;
319 	int len, err;
320 	char buf[MAXPATHLEN];
321 
322 	*pathend = EOS;
323 	errno = 0;
324 
325 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
326 		/* TODO: don't call for ENOENT or ENOTDIR? */
327 		if (pglob->gl_errfunc) {
328 			g_Ctoc(pathbuf, buf);
329 			if (pglob->gl_errfunc(buf, errno) ||
330 			    pglob->gl_flags & GLOB_ERR)
331 				return (GLOB_ABEND);
332 		}
333 		return(0);
334 	}
335 
336 	err = 0;
337 
338 	/* Search directory for matching names. */
339 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
340 		readdirfunc = pglob->gl_readdir;
341 	else
342 		readdirfunc = readdir;
343 	while ((dp = (*readdirfunc)(dirp))) {
344 		register u_char *sc;
345 		register Char *dc;
346 
347 		/* Initial DOT must be matched literally. */
348 		if (dp->d_name[0] == DOT && *pattern != DOT)
349 			continue;
350 		for (sc = (u_char *) dp->d_name, dc = pathend;
351 		     *dc++ = *sc++;);
352 		if (!match(pathend, pattern, restpattern)) {
353 			*pathend = EOS;
354 			continue;
355 		}
356 		err = glob2(pathbuf, --dc, restpattern, pglob);
357 		if (err)
358 			break;
359 	}
360 
361 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
362 		(*pglob->gl_closedir)(dirp);
363 	else
364 		closedir(dirp);
365 	return(err);
366 }
367 
368 
369 /*
370  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
371  * add the new item, and update gl_pathc.
372  *
373  * This assumes the BSD realloc, which only copies the block when its size
374  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
375  * behavior.
376  *
377  * Return 0 if new item added, error code if memory couldn't be allocated.
378  *
379  * Invariant of the glob_t structure:
380  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
381  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
382  */
383 static int
384 globextend(path, pglob)
385 	Char *path;
386 	glob_t *pglob;
387 {
388 	register char **pathv;
389 	register int i;
390 	u_int newsize;
391 	char *copy;
392 	Char *p;
393 
394 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
395 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
396 	if (pathv == NULL)
397 		return(GLOB_NOSPACE);
398 
399 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
400 		/* first time around -- clear initial gl_offs items */
401 		pathv += pglob->gl_offs;
402 		for (i = pglob->gl_offs; --i >= 0; )
403 			*--pathv = NULL;
404 	}
405 	pglob->gl_pathv = pathv;
406 
407 	for (p = path; *p++;);
408 	if ((copy = malloc(p - path)) != NULL) {
409 		g_Ctoc(path, copy);
410 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
411 	}
412 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
413 	return(copy == NULL ? GLOB_NOSPACE : 0);
414 }
415 
416 
417 /*
418  * pattern matching function for filenames.  Each occurrence of the *
419  * pattern causes a recursion level.
420  */
421 static
422 match(name, pat, patend)
423 	register Char *name, *pat, *patend;
424 {
425 	int ok, negate_range;
426 	Char c, k;
427 
428 	while (pat < patend) {
429 		c = *pat++;
430 		switch (c & M_MASK) {
431 		case M_ALL:
432 			if (pat == patend)
433 				return(1);
434 			do
435 			    if (match(name, pat, patend))
436 				    return(1);
437 			while (*name++ != EOS);
438 			return(0);
439 		case M_ONE:
440 			if (*name++ == EOS)
441 				return(0);
442 			break;
443 		case M_SET:
444 			ok = 0;
445 			if ((k = *name++) == EOS)
446 				return(0);
447 			if (negate_range = ((*pat & M_MASK) == M_NOT))
448 				++pat;
449 			while (((c = *pat++) & M_MASK) != M_END)
450 				if ((*pat & M_MASK) == M_RNG) {
451 					if (c <= k && k <= pat[1])
452 						ok = 1;
453 					pat += 2;
454 				} else if (c == k)
455 					ok = 1;
456 			if (ok == negate_range)
457 				return(0);
458 			break;
459 		default:
460 			if (*name++ != c)
461 				return(0);
462 			break;
463 		}
464 	}
465 	return(*name == EOS);
466 }
467 
468 /* Free allocated data belonging to a glob_t structure. */
469 void
470 globfree(pglob)
471 	glob_t *pglob;
472 {
473 	register int i;
474 	register char **pp;
475 
476 	if (pglob->gl_pathv != NULL) {
477 		pp = pglob->gl_pathv + pglob->gl_offs;
478 		for (i = pglob->gl_pathc; i--; ++pp)
479 			if (*pp)
480 				free(*pp);
481 		free(pglob->gl_pathv);
482 	}
483 }
484 
485 static DIR *
486 g_opendir(str, pglob)
487 	register Char *str;
488 	glob_t *pglob;
489 {
490 	char buf[MAXPATHLEN];
491 	char *dirname;
492 
493 	if (!*str)
494 		strcpy(buf, ".");
495 	else
496 		g_Ctoc(str, buf);
497 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
498 		return((*pglob->gl_opendir)(buf));
499 	return(opendir(buf));
500 }
501 
502 static int
503 g_lstat(fn, sb, pglob)
504 	register Char *fn;
505 	struct stat *sb;
506 	glob_t *pglob;
507 {
508 	char buf[MAXPATHLEN];
509 
510 	g_Ctoc(fn, buf);
511 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
512 		return((*pglob->gl_lstat)(buf, sb));
513 	return(lstat(buf, sb));
514 }
515 
516 static int
517 g_stat(fn, sb, pglob)
518 	register Char *fn;
519 	struct stat *sb;
520 	glob_t *pglob;
521 {
522 	char buf[MAXPATHLEN];
523 
524 	g_Ctoc(fn, buf);
525 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
526 		return((*pglob->gl_stat)(buf, sb));
527 	return(stat(buf, sb));
528 }
529 
530 static Char *
531 g_strchr(str, ch)
532 	Char *str;
533 	int ch;
534 {
535 	do {
536 		if (*str == ch)
537 			return (str);
538 	} while (*str++);
539 	return (NULL);
540 }
541 
542 static void
543 g_Ctoc(str, buf)
544 	register Char *str;
545 	char *buf;
546 {
547 	register char *dc;
548 
549 	for (dc = buf; *dc++ = *str++;);
550 }
551 
552 #ifdef DEBUG
553 static void
554 qprintf(s)
555 	register Char *s;
556 {
557 	register Char *p;
558 
559 	for (p = s; *p; p++)
560 		(void)printf("%c", CHAR(*p));
561 	(void)printf("\n");
562 	for (p = s; *p; p++)
563 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
564 	(void)printf("\n");
565 	for (p = s; *p; p++)
566 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
567 	(void)printf("\n");
568 }
569 #endif
570