xref: /netbsd-src/lib/libc/gen/glob.c (revision ce63d6c20fc4ec8ddc95c84bb229e3c4ecf82b69)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)glob.c	5.12 (Berkeley) 6/24/91";
39 #endif /* LIBC_SCCS and not lint */
40 
41 /*
42  * glob(3) -- a superset of the one defined in POSIX 1003.2.
43  *
44  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
45  *
46  * Optional extra services, controlled by flags not defined by POSIX:
47  *
48  * GLOB_QUOTE:
49  *	Escaping convention: \ inhibits any special meaning the following
50  *	character might have (except \ at end of string is retained).
51  * GLOB_MAGCHAR:
52  *	Set in gl_flags if pattern contained a globbing character.
53  * gl_matchc:
54  *	Number of matches in the current invocation of glob.
55  */
56 
57 #include <sys/cdefs.h>
58 #include <sys/param.h>
59 #include <sys/stat.h>
60 #include <dirent.h>
61 #include <glob.h>
62 #include <ctype.h>
63 #include <errno.h>
64 #include <string.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 
68 #define	DOLLAR		'$'
69 #define	DOT		'.'
70 #define	EOS		'\0'
71 #define	LBRACKET	'['
72 #define	NOT		'!'
73 #define	QUESTION	'?'
74 #define	QUOTE		'\\'
75 #define	RANGE		'-'
76 #define	RBRACKET	']'
77 #define	SEP		'/'
78 #define	STAR		'*'
79 #define	TILDE		'~'
80 #define	UNDERSCORE	'_'
81 
82 #define	M_QUOTE		0x8000
83 #define	M_PROTECT	0x4000
84 #define	M_MASK		0xffff
85 #define	M_ASCII		0x00ff
86 
87 #define	CHAR(c)		((c)&M_ASCII)
88 #define	META(c)		((c)|M_QUOTE)
89 #define	M_ALL		META('*')
90 #define	M_END		META(']')
91 #define	M_NOT		META('!')
92 #define	M_ONE		META('?')
93 #define	M_RNG		META('-')
94 #define	M_SET		META('[')
95 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
96 
97 typedef u_short Char;
98 
99 static int	 compare __P((const void *, const void *));
100 static void	 g_Ctoc __P((Char *, char *));
101 static int	 g_lstat __P((Char *, struct stat *));
102 static DIR	*g_opendir __P((Char *));
103 static Char	*g_strchr __P((Char *, int));
104 static int	 g_stat __P((Char *, struct stat *));
105 static int	 glob1 __P((Char *, glob_t *));
106 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
107 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
108 static int	 globextend __P((Char *, glob_t *));
109 static int	 match __P((Char *, Char *, Char *));
110 #ifdef DEBUG
111 static void	 qprintf __P((Char *));
112 #endif
113 
114 /*
115  * The main glob() routine: compiles the pattern (optionally processing
116  * quotes), calls glob1() to do the real pattern matching, and finally
117  * sorts the list (unless unsorted operation is requested).  Returns 0
118  * if things went well, nonzero if errors occurred.  It is not an error
119  * to find no matches.
120  */
121 glob(pattern, flags, errfunc, pglob)
122 	const char *pattern;
123 	int flags, (*errfunc) __P((char *, int));
124 	glob_t *pglob;
125 {
126 	const u_char *compilepat, *patnext;
127 	int c, err, oldpathc;
128 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
129 
130 	patnext = (u_char *) pattern;
131 	if (!(flags & GLOB_APPEND)) {
132 		pglob->gl_pathc = 0;
133 		pglob->gl_pathv = NULL;
134 		if (!(flags & GLOB_DOOFFS))
135 			pglob->gl_offs = 0;
136 	}
137 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
138 	pglob->gl_errfunc = errfunc;
139 	oldpathc = pglob->gl_pathc;
140 	pglob->gl_matchc = 0;
141 
142 	bufnext = patbuf;
143 	bufend = bufnext + MAXPATHLEN;
144 	compilebuf = bufnext;
145 	compilepat = patnext;
146 	if (flags & GLOB_QUOTE) {
147 		/* Protect the quoted characters. */
148 		while (bufnext < bufend && (c = *patnext++) != EOS)
149 			if (c == QUOTE) {
150 				if ((c = *patnext++) == EOS) {
151 					c = QUOTE;
152 					--patnext;
153 				}
154 				*bufnext++ = c | M_PROTECT;
155 			}
156 			else
157 				*bufnext++ = c;
158 	}
159 	else
160 	    while (bufnext < bufend && (c = *patnext++) != EOS)
161 		    *bufnext++ = c;
162 	*bufnext = EOS;
163 
164 	bufnext = patbuf;
165 	qpatnext = patbuf;
166 	/* We don't need to check for buffer overflow any more. */
167 	while ((c = *qpatnext++) != EOS) {
168 		switch (c) {
169 		case LBRACKET:
170 			pglob->gl_flags |= GLOB_MAGCHAR;
171 			c = *qpatnext;
172 			if (c == NOT)
173 				++qpatnext;
174 			if (*qpatnext == EOS ||
175 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
176 				*bufnext++ = LBRACKET;
177 				if (c == NOT)
178 					--qpatnext;
179 				break;
180 			}
181 			*bufnext++ = M_SET;
182 			if (c == NOT)
183 				*bufnext++ = M_NOT;
184 			c = *qpatnext++;
185 			do {
186 				*bufnext++ = CHAR(c);
187 				if (*qpatnext == RANGE &&
188 				    (c = qpatnext[1]) != RBRACKET) {
189 					*bufnext++ = M_RNG;
190 					*bufnext++ = CHAR(c);
191 					qpatnext += 2;
192 				}
193 			} while ((c = *qpatnext++) != RBRACKET);
194 			*bufnext++ = M_END;
195 			break;
196 		case QUESTION:
197 			pglob->gl_flags |= GLOB_MAGCHAR;
198 			*bufnext++ = M_ONE;
199 			break;
200 		case STAR:
201 			pglob->gl_flags |= GLOB_MAGCHAR;
202 			*bufnext++ = M_ALL;
203 			break;
204 		default:
205 			*bufnext++ = CHAR(c);
206 			break;
207 		}
208 	}
209 	*bufnext = EOS;
210 #ifdef DEBUG
211 	qprintf(patbuf);
212 #endif
213 
214 	if ((err = glob1(patbuf, pglob)) != 0)
215 		return(err);
216 
217 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
218 		if (!(flags & GLOB_QUOTE)) {
219 			Char *dp = compilebuf;
220 			const u_char *sp = compilepat;
221 			while (*dp++ = *sp++);
222 		}
223 		else {
224 			/*
225 			 * Copy pattern, interpreting quotes; this is slightly
226 			 * different than the interpretation of quotes above
227 			 * -- which should prevail?
228 			 */
229 			while (*compilepat != EOS) {
230 				if (*compilepat == QUOTE) {
231 					if (*++compilepat == EOS)
232 						--compilepat;
233 				}
234 				*compilebuf++ = (u_char)*compilepat++;
235 			}
236 			*compilebuf = EOS;
237 		}
238 		return(globextend(patbuf, pglob));
239 	} else if (!(flags & GLOB_NOSORT))
240 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
241 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
242 	return(0);
243 }
244 
245 static int
246 compare(p, q)
247 	const void *p, *q;
248 {
249 	return(strcmp(*(char **)p, *(char **)q));
250 }
251 
252 static
253 glob1(pattern, pglob)
254 	Char *pattern;
255 	glob_t *pglob;
256 {
257 	Char pathbuf[MAXPATHLEN+1];
258 
259 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
260 	if (*pattern == EOS)
261 		return(0);
262 	return(glob2(pathbuf, pathbuf, pattern, pglob));
263 }
264 
265 /*
266  * The functions glob2 and glob3 are mutually recursive; there is one level
267  * of recursion for each segment in the pattern that contains one or more
268  * meta characters.
269  */
270 static
271 glob2(pathbuf, pathend, pattern, pglob)
272 	Char *pathbuf, *pathend, *pattern;
273 	glob_t *pglob;
274 {
275 	struct stat sb;
276 	Char *p, *q;
277 	int anymeta;
278 
279 	/*
280 	 * Loop over pattern segments until end of pattern or until
281 	 * segment with meta character found.
282 	 */
283 	for (anymeta = 0;;) {
284 		if (*pattern == EOS) {		/* End of pattern? */
285 			*pathend = EOS;
286 			if (g_stat(pathbuf, &sb))
287 				return(0);
288 
289 			if (((pglob->gl_flags & GLOB_MARK) &&
290 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
291 			    || (S_ISLNK(sb.st_mode) &&
292 			    (g_stat(pathbuf, &sb) == 0) &&
293 			    S_ISDIR(sb.st_mode)))) {
294 				*pathend++ = SEP;
295 				*pathend = EOS;
296 			}
297 			++pglob->gl_matchc;
298 			return(globextend(pathbuf, pglob));
299 		}
300 
301 		/* Find end of next segment, copy tentatively to pathend. */
302 		q = pathend;
303 		p = pattern;
304 		while (*p != EOS && *p != SEP) {
305 			if (ismeta(*p))
306 				anymeta = 1;
307 			*q++ = *p++;
308 		}
309 
310 		if (!anymeta) {		/* No expansion, do next segment. */
311 			pathend = q;
312 			pattern = p;
313 			while (*pattern == SEP)
314 				*pathend++ = *pattern++;
315 		} else			/* Need expansion, recurse. */
316 			return(glob3(pathbuf, pathend, pattern, p, pglob));
317 	}
318 	/* NOTREACHED */
319 }
320 
321 static
322 glob3(pathbuf, pathend, pattern, restpattern, pglob)
323 	Char *pathbuf, *pathend, *pattern, *restpattern;
324 	glob_t *pglob;
325 {
326 	register struct dirent *dp;
327 	DIR *dirp;
328 	int len, err;
329 
330 	*pathend = EOS;
331 	errno = 0;
332 
333 	if (!(dirp = g_opendir(pathbuf)))
334 		/* TODO: don't call for ENOENT or ENOTDIR? */
335 		if (pglob->gl_errfunc &&
336 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
337 		    (pglob->gl_flags & GLOB_ERR))
338 			return(GLOB_ABEND);
339 		else
340 			return(0);
341 
342 	err = 0;
343 
344 	/* Search directory for matching names. */
345 	while ((dp = readdir(dirp))) {
346 		register u_char *sc;
347 		register Char *dc;
348 
349 		/* Initial DOT must be matched literally. */
350 		if (dp->d_name[0] == DOT && *pattern != DOT)
351 			continue;
352 		for (sc = (u_char *) dp->d_name, dc = pathend;
353 		     *dc++ = *sc++;);
354 		if (!match(pathend, pattern, restpattern)) {
355 			*pathend = EOS;
356 			continue;
357 		}
358 		err = glob2(pathbuf, --dc, restpattern, pglob);
359 		if (err)
360 			break;
361 	}
362 
363 	/* TODO: check error from readdir? */
364 	(void)closedir(dirp);
365 	return(err);
366 }
367 
368 
369 /*
370  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
371  * add the new item, and update gl_pathc.
372  *
373  * This assumes the BSD realloc, which only copies the block when its size
374  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
375  * behavior.
376  *
377  * Return 0 if new item added, error code if memory couldn't be allocated.
378  *
379  * Invariant of the glob_t structure:
380  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
381  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
382  */
383 static int
384 globextend(path, pglob)
385 	Char *path;
386 	glob_t *pglob;
387 {
388 	register char **pathv;
389 	register int i;
390 	u_int newsize;
391 	char *copy;
392 	Char *p;
393 
394 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
395 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
396 	if (pathv == NULL)
397 		return(GLOB_NOSPACE);
398 
399 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
400 		/* first time around -- clear initial gl_offs items */
401 		pathv += pglob->gl_offs;
402 		for (i = pglob->gl_offs; --i >= 0; )
403 			*--pathv = NULL;
404 	}
405 	pglob->gl_pathv = pathv;
406 
407 	for (p = path; *p++;);
408 	if ((copy = malloc(p - path)) != NULL) {
409 		g_Ctoc(path, copy);
410 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
411 	}
412 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
413 	return(copy == NULL ? GLOB_NOSPACE : 0);
414 }
415 
416 
417 /*
418  * pattern matching function for filenames.  Each occurrence of the *
419  * pattern causes a recursion level.
420  */
421 static
422 match(name, pat, patend)
423 	register Char *name, *pat, *patend;
424 {
425 	int ok, negate_range;
426 	Char c, k;
427 
428 	while (pat < patend) {
429 		c = *pat++;
430 		switch (c & M_MASK) {
431 		case M_ALL:
432 			if (pat == patend)
433 				return(1);
434 			for (; *name != EOS; ++name)
435 				if (match(name, pat, patend))
436 					return(1);
437 			return(0);
438 		case M_ONE:
439 			if (*name++ == EOS)
440 				return(0);
441 			break;
442 		case M_SET:
443 			ok = 0;
444 			k = *name++;
445 			if (negate_range = ((*pat & M_MASK) == M_NOT))
446 				++pat;
447 			while (((c = *pat++) & M_MASK) != M_END)
448 				if ((*pat & M_MASK) == M_RNG) {
449 					if (c <= k && k <= pat[1])
450 						ok = 1;
451 					pat += 2;
452 				} else if (c == k)
453 					ok = 1;
454 			if (ok == negate_range)
455 				return(0);
456 			break;
457 		default:
458 			if (*name++ != c)
459 				return(0);
460 			break;
461 		}
462 	}
463 	return(*name == EOS);
464 }
465 
466 /* Free allocated data belonging to a glob_t structure. */
467 void
468 globfree(pglob)
469 	glob_t *pglob;
470 {
471 	register int i;
472 	register char **pp;
473 
474 	if (pglob->gl_pathv != NULL) {
475 		pp = pglob->gl_pathv + pglob->gl_offs;
476 		for (i = pglob->gl_pathc; i--; ++pp)
477 			if (*pp)
478 				free(*pp);
479 		free(pglob->gl_pathv);
480 	}
481 }
482 
483 static DIR *
484 g_opendir(str)
485 	register Char *str;
486 {
487 	char buf[MAXPATHLEN];
488 
489 	if (!*str)
490 		return(opendir("."));
491 	g_Ctoc(str, buf);
492 	return(opendir(buf));
493 }
494 
495 static int
496 g_lstat(fn, sb)
497 	register Char *fn;
498 	struct stat *sb;
499 {
500 	char buf[MAXPATHLEN];
501 
502 	g_Ctoc(fn, buf);
503 	return(lstat(buf, sb));
504 }
505 
506 static int
507 g_stat(fn, sb)
508 	register Char *fn;
509 	struct stat *sb;
510 {
511 	char buf[MAXPATHLEN];
512 
513 	g_Ctoc(fn, buf);
514 	return(stat(buf, sb));
515 }
516 
517 static Char *
518 g_strchr(str, ch)
519 	Char *str;
520 	int ch;
521 {
522 	do {
523 		if (*str == ch)
524 			return (str);
525 	} while (*str++);
526 	return (NULL);
527 }
528 
529 static void
530 g_Ctoc(str, buf)
531 	register Char *str;
532 	char *buf;
533 {
534 	register char *dc;
535 
536 	for (dc = buf; *dc++ = *str++;);
537 }
538 
539 #ifdef DEBUG
540 static void
541 qprintf(s)
542 	register Char *s;
543 {
544 	register Char *p;
545 
546 	for (p = s; *p; p++)
547 		(void)printf("%c", *p & 0xff);
548 	(void)printf("\n");
549 	for (p = s; *p; p++)
550 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
551 	(void)printf("\n");
552 	for (p = s; *p; p++)
553 		(void)printf("%c", *p & M_META ? '_' : ' ');
554 	(void)printf("\n");
555 }
556 #endif
557