xref: /csrg-svn/lib/libc/gen/glob.c (revision 50049)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.9 (Berkeley) 06/16/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
17  * draft 9.
18  *
19  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
20  *
21  * Optional extra services, controlled by flags not defined by POSIX:
22  *
23  * GLOB_QUOTE:
24  *	Escaping convention: \ inhibits any special meaning the following
25  *	character might have (except \ at end of string is retained).
26  * GLOB_MAGCHAR:
27  *	Set in gl_flags if pattern contained a globbing character.
28  * gl_matchc:
29  *	Number of matches in the current invocation of glob.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 #include <dirent.h>
36 #include <glob.h>
37 #include <ctype.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #define	DOLLAR		'$'
44 #define	DOT		'.'
45 #define	EOS		'\0'
46 #define	LBRACKET	'['
47 #define	NOT		'!'
48 #define	QUESTION	'?'
49 #define	QUOTE		'\\'
50 #define	RANGE		'-'
51 #define	RBRACKET	']'
52 #define	SEP		'/'
53 #define	STAR		'*'
54 #define	TILDE		'~'
55 #define	UNDERSCORE	'_'
56 
57 #define	M_QUOTE		0x8000
58 #define	M_PROTECT	0x4000
59 #define	M_MASK		0xffff
60 
61 #define	META(c)		((c)|M_QUOTE)
62 #define	M_ALL		META('*')
63 #define	M_END		META(']')
64 #define	M_NOT		META('!')
65 #define	M_ONE		META('?')
66 #define	M_RNG		META('-')
67 #define	M_SET		META('[')
68 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
69 
70 typedef u_short shortchar_t;
71 
72 static int glob1 __P((shortchar_t *, glob_t *));
73 static int glob2 __P((shortchar_t *, shortchar_t *, shortchar_t *, glob_t *));
74 static int glob3 __P((shortchar_t *,
75 	shortchar_t *, shortchar_t *, shortchar_t *, glob_t *));
76 static int globextend __P((shortchar_t *, glob_t *));
77 static int match __P((shortchar_t *, shortchar_t *, shortchar_t *));
78 static DIR *Opendir __P((shortchar_t *));
79 static int Lstat __P((shortchar_t *, struct stat *));
80 static int Stat __P((shortchar_t *, struct stat *));
81 static shortchar_t *Strchr __P((shortchar_t *, int));
82 #ifdef DEBUG
83 static void qprintf __P((shortchar_t *));
84 #endif
85 
86 static DIR *
87 Opendir(str)
88 	register shortchar_t *str;
89 {
90 	register char *dc;
91 	char buf[MAXPATHLEN];
92 
93 	if (!*str)
94 		return(opendir("."));
95 	for (dc = buf; *dc++ = *str++;);
96 	return(opendir(buf));
97 }
98 
99 static int
100 Lstat(fn, sb)
101 	register shortchar_t *fn;
102 	struct stat *sb;
103 {
104 	register char *dc;
105 	char buf[MAXPATHLEN];
106 
107 	for (dc = buf; *dc++ = *fn++;);
108 	return(lstat(buf, sb));
109 }
110 
111 static int
112 Stat(fn, sb)
113 	register shortchar_t *fn;
114 	struct stat *sb;
115 {
116 	register char *dc;
117 	char buf[MAXPATHLEN];
118 
119 	for (dc = buf; *dc++ = *fn++;);
120 	return(stat(buf, sb));
121 }
122 
123 static shortchar_t *
124 Strchr(str, ch)
125 	shortchar_t *str;
126 	int ch;
127 {
128 	do {
129 		if (*str == ch)
130 			return (str);
131 	} while (*str++);
132 	return (NULL);
133 }
134 
135 #ifdef DEBUG
136 static void
137 qprintf(s)
138 shortchar_t *s;
139 {
140 	shortchar_t *p;
141 
142 	for (p = s; *p; p++)
143 		printf("%c", *p & 0xff);
144 	printf("\n");
145 	for (p = s; *p; p++)
146 		printf("%c", *p & M_PROTECT ? '"' : ' ');
147 	printf("\n");
148 	for (p = s; *p; p++)
149 		printf("%c", *p & M_META ? '_' : ' ');
150 	printf("\n");
151 }
152 #endif
153 
154 static int
155 compare(p, q)
156 	void **p, **q;
157 {
158 	return(strcmp(*(char **)p, *(char **)q));
159 }
160 
161 /*
162  * The main glob() routine: compiles the pattern (optionally processing
163  * quotes), calls glob1() to do the real pattern matching, and finally
164  * sorts the list (unless unsorted operation is requested).  Returns 0
165  * if things went well, nonzero if errors occurred.  It is not an error
166  * to find no matches.
167  */
168 glob(pattern, flags, errfunc, pglob)
169 	const char *pattern;
170 	int flags;
171 	int (*errfunc) __P((char *, int));
172 	glob_t *pglob;
173 {
174 	int err, oldpathc;
175 	shortchar_t *bufnext, *bufend, *compilebuf;
176 	const char *compilepat, *patnext;
177 	char c;
178 	shortchar_t patbuf[MAXPATHLEN+1], *qpatnext;
179 
180 	patnext = pattern;
181 	if (!(flags & GLOB_APPEND)) {
182 		pglob->gl_pathc = 0;
183 		pglob->gl_pathv = NULL;
184 		if (!(flags & GLOB_DOOFFS))
185 			pglob->gl_offs = 0;
186 	}
187 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
188 	pglob->gl_errfunc = errfunc;
189 	oldpathc = pglob->gl_pathc;
190 	pglob->gl_matchc = 0;
191 
192 	bufnext = patbuf;
193 	bufend = bufnext + MAXPATHLEN;
194 	if (flags & GLOB_QUOTE) {
195 		/* Protect the quoted characters */
196 		while (bufnext < bufend && (c = *patnext++) != EOS)
197 			if (c == QUOTE) {
198 				if ((c = *patnext++) == EOS) {
199 					c = QUOTE;
200 					--patnext;
201 				}
202 				*bufnext++ = c | M_PROTECT;
203 			}
204 			else
205 				*bufnext++ = c;
206 	}
207 	else
208 	    while (bufnext < bufend && (c = *patnext++) != EOS)
209 		    *bufnext++ = c;
210 	*bufnext = EOS;
211 
212 	bufnext = patbuf;
213 	qpatnext = patbuf;
214 	compilebuf = bufnext;
215 	compilepat = patnext;
216 	/* we don't need to check for buffer overflow any more */
217 	while ((c = *qpatnext++) != EOS) {
218 		switch (c) {
219 		case LBRACKET:
220 			pglob->gl_flags |= GLOB_MAGCHAR;
221 			c = *qpatnext;
222 			if (c == NOT)
223 				++qpatnext;
224 			if (*qpatnext == EOS ||
225 			    Strchr(qpatnext+1, RBRACKET) == NULL) {
226 				*bufnext++ = LBRACKET;
227 				if (c == NOT)
228 					--qpatnext;
229 				break;
230 			}
231 			*bufnext++ = M_SET;
232 			if (c == NOT)
233 				*bufnext++ = M_NOT;
234 			c = *qpatnext++;
235 			do {
236 				*bufnext++ = c;
237 				if (*qpatnext == RANGE &&
238 				    (c = qpatnext[1]) != RBRACKET) {
239 					*bufnext++ = M_RNG;
240 					*bufnext++ = c;
241 					qpatnext += 2;
242 				}
243 			} while ((c = *qpatnext++) != RBRACKET);
244 			*bufnext++ = M_END;
245 			break;
246 		case QUESTION:
247 			pglob->gl_flags |= GLOB_MAGCHAR;
248 			*bufnext++ = M_ONE;
249 			break;
250 		case STAR:
251 			pglob->gl_flags |= GLOB_MAGCHAR;
252 			*bufnext++ = M_ALL;
253 			break;
254 		default:
255 			*bufnext++ = c;
256 			break;
257 		}
258 	}
259 	*bufnext = EOS;
260 #ifdef DEBUG
261 	qprintf(patbuf);
262 #endif
263 
264 	if ((err = glob1(patbuf, pglob)) != 0)
265 		return(err);
266 
267 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
268 		if (!(flags & GLOB_QUOTE)) {
269 			shortchar_t *dp = compilebuf;
270 			const char *sp = compilepat;
271 			while (*dp++ = (u_char)*sp++);
272 		}
273 		else {
274 			/*
275 			 * copy pattern, interpreting quotes; this is slightly
276 			 * different than the interpretation of quotes above
277 			 * -- which should prevail?
278 			 */
279 			while (*compilepat != EOS) {
280 				if (*compilepat == QUOTE) {
281 					if (*++compilepat == EOS)
282 						--compilepat;
283 				}
284 				*compilebuf++ = (u_char)*compilepat++;
285 			}
286 			*compilebuf = EOS;
287 		}
288 		return(globextend(patbuf, pglob));
289 	} else if (!(flags & GLOB_NOSORT))
290 		qsort((char*)(pglob->gl_pathv + pglob->gl_offs + oldpathc),
291 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
292 	return(0);
293 }
294 
295 static
296 glob1(pattern, pglob)
297 	shortchar_t *pattern;
298 	glob_t *pglob;
299 {
300 	shortchar_t pathbuf[MAXPATHLEN+1];
301 
302 	/*
303 	 * A null pathname is invalid -- POSIX 1003.1 sect. 2.4.
304 	 */
305 	if (*pattern == EOS)
306 		return(0);
307 	return(glob2(pathbuf, pathbuf, pattern, pglob));
308 }
309 
310 /*
311  * functions glob2 and glob3 are mutually recursive; there is one level
312  * of recursion for each segment in the pattern that contains one or
313  * more meta characters.
314  */
315 static
316 glob2(pathbuf, pathend, pattern, pglob)
317 	shortchar_t *pathbuf, *pathend, *pattern;
318 	glob_t *pglob;
319 {
320 	struct stat sbuf;
321 	shortchar_t *p, *q;
322 	int anymeta;
323 
324 	/*
325 	 * loop over pattern segments until end of pattern or until
326 	 * segment with meta character found.
327 	 */
328 	for (anymeta = 0;;) {
329 		if (*pattern == EOS) {		/* end of pattern? */
330 			*pathend = EOS;
331 			if (Lstat(pathbuf, &sbuf))
332 				return(0);
333 
334 			if (((pglob->gl_flags & GLOB_MARK) &&
335 			    pathend[-1] != SEP) && (S_ISDIR(sbuf.st_mode)
336 			    || (S_ISLNK(sbuf.st_mode) &&
337 			    (Stat(pathbuf, &sbuf) == 0) &&
338 			    S_ISDIR(sbuf.st_mode)))) {
339 				*pathend++ = SEP;
340 				*pathend = EOS;
341 			}
342 			++pglob->gl_matchc;
343 			return(globextend(pathbuf, pglob));
344 		}
345 
346 		/* find end of next segment, copy tentatively to pathend */
347 		q = pathend;
348 		p = pattern;
349 		while (*p != EOS && *p != SEP) {
350 			if (ismeta(*p))
351 				anymeta = 1;
352 			*q++ = *p++;
353 		}
354 
355 		if (!anymeta) {		/* no expansion, do next segment */
356 			pathend = q;
357 			pattern = p;
358 			while (*pattern == SEP)
359 				*pathend++ = *pattern++;
360 		} else			/* need expansion, recurse */
361 			return(glob3(pathbuf, pathend, pattern, p, pglob));
362 	}
363 	/* NOTREACHED */
364 }
365 
366 static
367 glob3(pathbuf, pathend, pattern, restpattern, pglob)
368 	shortchar_t *pathbuf, *pathend, *pattern, *restpattern;
369 	glob_t *pglob;
370 {
371 	extern int errno;
372 	DIR *dirp;
373 	struct dirent *dp;
374 	int len, err;
375 
376 	*pathend = EOS;
377 	errno = 0;
378 
379 	if (!(dirp = Opendir(pathbuf)))
380 		/* todo: don't call for ENOENT or ENOTDIR? */
381 		if (pglob->gl_errfunc &&
382 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
383 		    (pglob->gl_flags & GLOB_ERR))
384 			return(GLOB_ABEND);
385 		else
386 			return(0);
387 
388 	err = 0;
389 
390 	/* search directory for matching names */
391 	while ((dp = readdir(dirp))) {
392 		register char *sc;
393 		register shortchar_t *dc;
394 		/* initial DOT must be matched literally */
395 		if (dp->d_name[0] == DOT && *pattern != DOT)
396 			continue;
397 		for (sc = dp->d_name, dc = pathend;
398 		     *dc++ = (u_char)*sc++;);
399 		if (!match(pathend, pattern, restpattern)) {
400 			*pathend = EOS;
401 			continue;
402 		}
403 		err = glob2(pathbuf, --dc, restpattern, pglob);
404 		if (err)
405 			break;
406 	}
407 	/* todo: check error from readdir? */
408 	(void)closedir(dirp);
409 	return(err);
410 }
411 
412 
413 /*
414  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
415  * add the new item, and update gl_pathc.
416  *
417  * This assumes the BSD realloc, which only copies the block when its size
418  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
419  * behavior.
420  *
421  * Return 0 if new item added, error code if memory couldn't be allocated.
422  *
423  * Invariant of the glob_t structure:
424  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
425  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
426  */
427 static int
428 globextend(path, pglob)
429 	shortchar_t *path;
430 	glob_t *pglob;
431 {
432 	register char **pathv;
433 	register int i;
434 	u_int newsize;
435 	char *copy;
436 	shortchar_t *p;
437 
438 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
439 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
440 	if (pathv == NULL)
441 		return(GLOB_NOSPACE);
442 
443 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
444 		/* first time around -- clear initial gl_offs items */
445 		pathv += pglob->gl_offs;
446 		for (i = pglob->gl_offs; --i >= 0; )
447 			*--pathv = NULL;
448 	}
449 	pglob->gl_pathv = pathv;
450 
451 	for (p = path; *p++;);
452 	if ((copy = malloc(p - path)) != NULL) {
453 		register char *dc;
454 		register shortchar_t *sc;
455 
456 		for (dc = copy, sc = path; *dc++ = *sc++;);
457 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
458 	}
459 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
460 	return((copy == NULL) ? GLOB_NOSPACE : 0);
461 }
462 
463 
464 /*
465  * pattern matching function for filenames.  Each occurrence of the *
466  * pattern causes a recursion level.
467  */
468 static
469 match(name, pat, patend)
470 	register shortchar_t *name, *pat, *patend;
471 {
472 	int ok, negate_range;
473 	shortchar_t c, k;
474 
475 	while (pat < patend) {
476 		c = *pat++;
477 		switch (c & M_MASK) {
478 		case M_ALL:
479 			if (pat == patend)
480 				return(1);
481 			for (; *name != EOS; ++name) {
482 				if (match(name, pat, patend))
483 					return(1);
484 			}
485 			return(0);
486 		case M_ONE:
487 			if (*name++ == EOS)
488 				return(0);
489 			break;
490 		case M_SET:
491 			ok = 0;
492 			k = *name++;
493 			if (negate_range = ((*pat & M_MASK) == M_NOT))
494 				++pat;
495 			while (((c = *pat++) & M_MASK) != M_END) {
496 				if ((*pat & M_MASK) == M_RNG) {
497 					if (c <= k && k <= pat[1])
498 						ok = 1;
499 					pat += 2;
500 				}
501 				else if (c == k)
502 					ok = 1;
503 			}
504 			if (ok == negate_range)
505 				return(0);
506 			break;
507 		default:
508 			if (*name++ != c)
509 				return(0);
510 			break;
511 		}
512 	}
513 	return(*name == EOS);
514 }
515 
516 /* free allocated data belonging to a glob_t structure */
517 void
518 globfree(pglob)
519 	glob_t *pglob;
520 {
521 	register int i;
522 	register char **pp;
523 
524 	if (pglob->gl_pathv != NULL) {
525 		pp = pglob->gl_pathv + pglob->gl_offs;
526 		for (i = pglob->gl_pathc; i--; ++pp)
527 			if (*pp)
528 				free(pp);
529 		free(pglob->gl_pathv);
530 	}
531 }
532