xref: /netbsd-src/lib/libc/gen/glob.c (revision e7ae1531d5bf1dc29cd4b5bd3da98e9701c3aeac)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #if defined(LIBC_SCCS) && !defined(lint)
38 /*static char sccsid[] = "from: @(#)glob.c	5.18 (Berkeley) 12/4/92";*/
39 static char rcsid[] = "$Id: glob.c,v 1.2 1993/07/30 07:57:52 mycroft Exp $";
40 #endif /* LIBC_SCCS and not lint */
41 
42 /*
43  * glob(3) -- a superset of the one defined in POSIX 1003.2.
44  *
45  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
46  *
47  * Optional extra services, controlled by flags not defined by POSIX:
48  *
49  * GLOB_QUOTE:
50  *	Escaping convention: \ inhibits any special meaning the following
51  *	character might have (except \ at end of string is retained).
52  * GLOB_MAGCHAR:
53  *	Set in gl_flags if pattern contained a globbing character.
54  * GLOB_NOMAGIC:
55  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
56  *	not contain any magic characters.  [Used in csh style globbing]
57  * GLOB_ALTDIRFUNC:
58  *	Use alternately specified directory access functions.
59  * gl_matchc:
60  *	Number of matches in the current invocation of glob.
61  */
62 
63 #include <sys/param.h>
64 #include <sys/stat.h>
65 #include <dirent.h>
66 #include <glob.h>
67 #include <ctype.h>
68 #include <errno.h>
69 #include <string.h>
70 #include <stdio.h>
71 #include <stdlib.h>
72 
73 #define	DOLLAR		'$'
74 #define	DOT		'.'
75 #define	EOS		'\0'
76 #define	LBRACKET	'['
77 #define	NOT		'!'
78 #define	QUESTION	'?'
79 #define	QUOTE		'\\'
80 #define	RANGE		'-'
81 #define	RBRACKET	']'
82 #define	SEP		'/'
83 #define	STAR		'*'
84 #define	TILDE		'~'
85 #define	UNDERSCORE	'_'
86 
87 #define	M_QUOTE		0x8000
88 #define	M_PROTECT	0x4000
89 #define	M_MASK		0xffff
90 #define	M_ASCII		0x00ff
91 
92 #define	CHAR(c)		((c)&M_ASCII)
93 #define	META(c)		((c)|M_QUOTE)
94 #define	M_ALL		META('*')
95 #define	M_END		META(']')
96 #define	M_NOT		META('!')
97 #define	M_ONE		META('?')
98 #define	M_RNG		META('-')
99 #define	M_SET		META('[')
100 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
101 
102 typedef u_short Char;
103 
104 static int	 compare __P((const void *, const void *));
105 static void	 g_Ctoc __P((Char *, char *));
106 static int	 g_lstat __P((Char *, struct stat *, glob_t *));
107 static DIR	*g_opendir __P((Char *, glob_t *));
108 static Char	*g_strchr __P((Char *, int));
109 static int	 g_stat __P((Char *, struct stat *, glob_t *));
110 static int	 glob1 __P((Char *, glob_t *));
111 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
112 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
113 static int	 globextend __P((Char *, glob_t *));
114 static int	 match __P((Char *, Char *, Char *));
115 #ifdef DEBUG
116 static void	 qprintf __P((Char *));
117 #endif
118 
119 /*
120  * The main glob() routine: compiles the pattern (optionally processing
121  * quotes), calls glob1() to do the real pattern matching, and finally
122  * sorts the list (unless unsorted operation is requested).  Returns 0
123  * if things went well, nonzero if errors occurred.  It is not an error
124  * to find no matches.
125  */
126 glob(pattern, flags, errfunc, pglob)
127 	const char *pattern;
128 	int flags, (*errfunc) __P((char *, int));
129 	glob_t *pglob;
130 {
131 	const u_char *compilepat, *patnext;
132 	int c, err, oldpathc;
133 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
134 
135 	patnext = (u_char *) pattern;
136 	if (!(flags & GLOB_APPEND)) {
137 		pglob->gl_pathc = 0;
138 		pglob->gl_pathv = NULL;
139 		if (!(flags & GLOB_DOOFFS))
140 			pglob->gl_offs = 0;
141 	}
142 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
143 	pglob->gl_errfunc = errfunc;
144 	oldpathc = pglob->gl_pathc;
145 	pglob->gl_matchc = 0;
146 
147 	bufnext = patbuf;
148 	bufend = bufnext + MAXPATHLEN;
149 	compilebuf = bufnext;
150 	compilepat = patnext;
151 	if (flags & GLOB_QUOTE) {
152 		/* Protect the quoted characters. */
153 		while (bufnext < bufend && (c = *patnext++) != EOS)
154 			if (c == QUOTE) {
155 				if ((c = *patnext++) == EOS) {
156 					c = QUOTE;
157 					--patnext;
158 				}
159 				*bufnext++ = c | M_PROTECT;
160 			}
161 			else
162 				*bufnext++ = c;
163 	}
164 	else
165 	    while (bufnext < bufend && (c = *patnext++) != EOS)
166 		    *bufnext++ = c;
167 	*bufnext = EOS;
168 
169 	bufnext = patbuf;
170 	qpatnext = patbuf;
171 	/* We don't need to check for buffer overflow any more. */
172 	while ((c = *qpatnext++) != EOS) {
173 		switch (c) {
174 		case LBRACKET:
175 			c = *qpatnext;
176 			if (c == NOT)
177 				++qpatnext;
178 			if (*qpatnext == EOS ||
179 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
180 				*bufnext++ = LBRACKET;
181 				if (c == NOT)
182 					--qpatnext;
183 				break;
184 			}
185 			*bufnext++ = M_SET;
186 			if (c == NOT)
187 				*bufnext++ = M_NOT;
188 			c = *qpatnext++;
189 			do {
190 				*bufnext++ = CHAR(c);
191 				if (*qpatnext == RANGE &&
192 				    (c = qpatnext[1]) != RBRACKET) {
193 					*bufnext++ = M_RNG;
194 					*bufnext++ = CHAR(c);
195 					qpatnext += 2;
196 				}
197 			} while ((c = *qpatnext++) != RBRACKET);
198 			pglob->gl_flags |= GLOB_MAGCHAR;
199 			*bufnext++ = M_END;
200 			break;
201 		case QUESTION:
202 			pglob->gl_flags |= GLOB_MAGCHAR;
203 			*bufnext++ = M_ONE;
204 			break;
205 		case STAR:
206 			pglob->gl_flags |= GLOB_MAGCHAR;
207 			/* collapse adjacent stars to one,
208 			 * to avoid exponential behavior
209 			 */
210 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
211 			    *bufnext++ = M_ALL;
212 			break;
213 		default:
214 			*bufnext++ = CHAR(c);
215 			break;
216 		}
217 	}
218 	*bufnext = EOS;
219 #ifdef DEBUG
220 	qprintf(patbuf);
221 #endif
222 
223 	if ((err = glob1(patbuf, pglob)) != 0)
224 		return(err);
225 
226 	/*
227 	 * If there was no match we are going to append the pattern
228 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
229 	 * and the pattern did not contain any magic characters
230 	 * GLOB_NOMAGIC is there just for compatibility with csh.
231 	 */
232 	if (pglob->gl_pathc == oldpathc &&
233 	    ((flags & GLOB_NOCHECK) ||
234 	     ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
235 		if (!(flags & GLOB_QUOTE)) {
236 			Char *dp = compilebuf;
237 			const u_char *sp = compilepat;
238 			while (*dp++ = *sp++);
239 		}
240 		else {
241 			/*
242 			 * Copy pattern, interpreting quotes; this is slightly
243 			 * different than the interpretation of quotes above
244 			 * -- which should prevail?
245 			 */
246 			while (*compilepat != EOS) {
247 				if (*compilepat == QUOTE) {
248 					if (*++compilepat == EOS)
249 						--compilepat;
250 				}
251 				*compilebuf++ = (u_char)*compilepat++;
252 			}
253 			*compilebuf = EOS;
254 		}
255 		return(globextend(patbuf, pglob));
256 	} else if (!(flags & GLOB_NOSORT))
257 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
258 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
259 	return(0);
260 }
261 
262 static int
263 compare(p, q)
264 	const void *p, *q;
265 {
266 	return(strcmp(*(char **)p, *(char **)q));
267 }
268 
269 static
270 glob1(pattern, pglob)
271 	Char *pattern;
272 	glob_t *pglob;
273 {
274 	Char pathbuf[MAXPATHLEN+1];
275 
276 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
277 	if (*pattern == EOS)
278 		return(0);
279 	return(glob2(pathbuf, pathbuf, pattern, pglob));
280 }
281 
282 /*
283  * The functions glob2 and glob3 are mutually recursive; there is one level
284  * of recursion for each segment in the pattern that contains one or more
285  * meta characters.
286  */
287 static
288 glob2(pathbuf, pathend, pattern, pglob)
289 	Char *pathbuf, *pathend, *pattern;
290 	glob_t *pglob;
291 {
292 	struct stat sb;
293 	Char *p, *q;
294 	int anymeta;
295 
296 	/*
297 	 * Loop over pattern segments until end of pattern or until
298 	 * segment with meta character found.
299 	 */
300 	for (anymeta = 0;;) {
301 		if (*pattern == EOS) {		/* End of pattern? */
302 			*pathend = EOS;
303 			if (g_lstat(pathbuf, &sb, pglob))
304 				return(0);
305 
306 			if (((pglob->gl_flags & GLOB_MARK) &&
307 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
308 			    || (S_ISLNK(sb.st_mode) &&
309 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
310 			    S_ISDIR(sb.st_mode)))) {
311 				*pathend++ = SEP;
312 				*pathend = EOS;
313 			}
314 			++pglob->gl_matchc;
315 			return(globextend(pathbuf, pglob));
316 		}
317 
318 		/* Find end of next segment, copy tentatively to pathend. */
319 		q = pathend;
320 		p = pattern;
321 		while (*p != EOS && *p != SEP) {
322 			if (ismeta(*p))
323 				anymeta = 1;
324 			*q++ = *p++;
325 		}
326 
327 		if (!anymeta) {		/* No expansion, do next segment. */
328 			pathend = q;
329 			pattern = p;
330 			while (*pattern == SEP)
331 				*pathend++ = *pattern++;
332 		} else			/* Need expansion, recurse. */
333 			return(glob3(pathbuf, pathend, pattern, p, pglob));
334 	}
335 	/* NOTREACHED */
336 }
337 
338 static
339 glob3(pathbuf, pathend, pattern, restpattern, pglob)
340 	Char *pathbuf, *pathend, *pattern, *restpattern;
341 	glob_t *pglob;
342 {
343 	register struct dirent *dp;
344 	struct dirent *(*readdirfunc)();
345 	DIR *dirp;
346 	int len, err;
347 	char buf[MAXPATHLEN];
348 
349 	*pathend = EOS;
350 	errno = 0;
351 
352 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
353 		/* TODO: don't call for ENOENT or ENOTDIR? */
354 		if (pglob->gl_errfunc) {
355 			g_Ctoc(pathbuf, buf);
356 			if (pglob->gl_errfunc(buf, errno) ||
357 			    pglob->gl_flags & GLOB_ERR)
358 				return (GLOB_ABEND);
359 		}
360 		return(0);
361 	}
362 
363 	err = 0;
364 
365 	/* Search directory for matching names. */
366 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
367 		readdirfunc = pglob->gl_readdir;
368 	else
369 		readdirfunc = readdir;
370 	while ((dp = (*readdirfunc)(dirp))) {
371 		register u_char *sc;
372 		register Char *dc;
373 
374 		/* Initial DOT must be matched literally. */
375 		if (dp->d_name[0] == DOT && *pattern != DOT)
376 			continue;
377 		for (sc = (u_char *) dp->d_name, dc = pathend;
378 		     *dc++ = *sc++;);
379 		if (!match(pathend, pattern, restpattern)) {
380 			*pathend = EOS;
381 			continue;
382 		}
383 		err = glob2(pathbuf, --dc, restpattern, pglob);
384 		if (err)
385 			break;
386 	}
387 
388 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
389 		(*pglob->gl_closedir)(dirp);
390 	else
391 		closedir(dirp);
392 	return(err);
393 }
394 
395 
396 /*
397  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
398  * add the new item, and update gl_pathc.
399  *
400  * This assumes the BSD realloc, which only copies the block when its size
401  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
402  * behavior.
403  *
404  * Return 0 if new item added, error code if memory couldn't be allocated.
405  *
406  * Invariant of the glob_t structure:
407  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
408  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
409  */
410 static int
411 globextend(path, pglob)
412 	Char *path;
413 	glob_t *pglob;
414 {
415 	register char **pathv;
416 	register int i;
417 	u_int newsize;
418 	char *copy;
419 	Char *p;
420 
421 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
422 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
423 	if (pathv == NULL)
424 		return(GLOB_NOSPACE);
425 
426 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
427 		/* first time around -- clear initial gl_offs items */
428 		pathv += pglob->gl_offs;
429 		for (i = pglob->gl_offs; --i >= 0; )
430 			*--pathv = NULL;
431 	}
432 	pglob->gl_pathv = pathv;
433 
434 	for (p = path; *p++;);
435 	if ((copy = malloc(p - path)) != NULL) {
436 		g_Ctoc(path, copy);
437 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
438 	}
439 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
440 	return(copy == NULL ? GLOB_NOSPACE : 0);
441 }
442 
443 
444 /*
445  * pattern matching function for filenames.  Each occurrence of the *
446  * pattern causes a recursion level.
447  */
448 static
449 match(name, pat, patend)
450 	register Char *name, *pat, *patend;
451 {
452 	int ok, negate_range;
453 	Char c, k;
454 
455 	while (pat < patend) {
456 		c = *pat++;
457 		switch (c & M_MASK) {
458 		case M_ALL:
459 			if (pat == patend)
460 				return(1);
461 			do
462 			    if (match(name, pat, patend))
463 				    return(1);
464 			while (*name++ != EOS);
465 			return(0);
466 		case M_ONE:
467 			if (*name++ == EOS)
468 				return(0);
469 			break;
470 		case M_SET:
471 			ok = 0;
472 			if ((k = *name++) == EOS)
473 				return(0);
474 			if (negate_range = ((*pat & M_MASK) == M_NOT))
475 				++pat;
476 			while (((c = *pat++) & M_MASK) != M_END)
477 				if ((*pat & M_MASK) == M_RNG) {
478 					if (c <= k && k <= pat[1])
479 						ok = 1;
480 					pat += 2;
481 				} else if (c == k)
482 					ok = 1;
483 			if (ok == negate_range)
484 				return(0);
485 			break;
486 		default:
487 			if (*name++ != c)
488 				return(0);
489 			break;
490 		}
491 	}
492 	return(*name == EOS);
493 }
494 
495 /* Free allocated data belonging to a glob_t structure. */
496 void
497 globfree(pglob)
498 	glob_t *pglob;
499 {
500 	register int i;
501 	register char **pp;
502 
503 	if (pglob->gl_pathv != NULL) {
504 		pp = pglob->gl_pathv + pglob->gl_offs;
505 		for (i = pglob->gl_pathc; i--; ++pp)
506 			if (*pp)
507 				free(*pp);
508 		free(pglob->gl_pathv);
509 	}
510 }
511 
512 static DIR *
513 g_opendir(str, pglob)
514 	register Char *str;
515 	glob_t *pglob;
516 {
517 	char buf[MAXPATHLEN];
518 	char *dirname;
519 
520 	if (!*str)
521 		strcpy(buf, ".");
522 	else
523 		g_Ctoc(str, buf);
524 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
525 		return((*pglob->gl_opendir)(buf));
526 	return(opendir(buf));
527 }
528 
529 static int
530 g_lstat(fn, sb, pglob)
531 	register Char *fn;
532 	struct stat *sb;
533 	glob_t *pglob;
534 {
535 	char buf[MAXPATHLEN];
536 
537 	g_Ctoc(fn, buf);
538 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
539 		return((*pglob->gl_lstat)(buf, sb));
540 	return(lstat(buf, sb));
541 }
542 
543 static int
544 g_stat(fn, sb, pglob)
545 	register Char *fn;
546 	struct stat *sb;
547 	glob_t *pglob;
548 {
549 	char buf[MAXPATHLEN];
550 
551 	g_Ctoc(fn, buf);
552 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
553 		return((*pglob->gl_stat)(buf, sb));
554 	return(stat(buf, sb));
555 }
556 
557 static Char *
558 g_strchr(str, ch)
559 	Char *str;
560 	int ch;
561 {
562 	do {
563 		if (*str == ch)
564 			return (str);
565 	} while (*str++);
566 	return (NULL);
567 }
568 
569 static void
570 g_Ctoc(str, buf)
571 	register Char *str;
572 	char *buf;
573 {
574 	register char *dc;
575 
576 	for (dc = buf; *dc++ = *str++;);
577 }
578 
579 #ifdef DEBUG
580 static void
581 qprintf(s)
582 	register Char *s;
583 {
584 	register Char *p;
585 
586 	for (p = s; *p; p++)
587 		(void)printf("%c", CHAR(*p));
588 	(void)printf("\n");
589 	for (p = s; *p; p++)
590 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
591 	(void)printf("\n");
592 	for (p = s; *p; p++)
593 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
594 	(void)printf("\n");
595 }
596 #endif
597