xref: /minix3/external/bsd/libarchive/dist/libarchive_fe/pathmatch.c (revision 543adbed3a3a783ed36434adafbc258b6bde442d)
1*543adbedSBen Gras /*-
2*543adbedSBen Gras  * Copyright (c) 2003-2007 Tim Kientzle
3*543adbedSBen Gras  * All rights reserved.
4*543adbedSBen Gras  *
5*543adbedSBen Gras  * Redistribution and use in source and binary forms, with or without
6*543adbedSBen Gras  * modification, are permitted provided that the following conditions
7*543adbedSBen Gras  * are met:
8*543adbedSBen Gras  * 1. Redistributions of source code must retain the above copyright
9*543adbedSBen Gras  *    notice, this list of conditions and the following disclaimer
10*543adbedSBen Gras  *    in this position and unchanged.
11*543adbedSBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
12*543adbedSBen Gras  *    notice, this list of conditions and the following disclaimer in the
13*543adbedSBen Gras  *    documentation and/or other materials provided with the distribution.
14*543adbedSBen Gras  *
15*543adbedSBen Gras  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16*543adbedSBen Gras  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17*543adbedSBen Gras  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18*543adbedSBen Gras  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19*543adbedSBen Gras  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20*543adbedSBen Gras  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21*543adbedSBen Gras  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22*543adbedSBen Gras  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*543adbedSBen Gras  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24*543adbedSBen Gras  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*543adbedSBen Gras  */
26*543adbedSBen Gras 
27*543adbedSBen Gras #include "lafe_platform.h"
28*543adbedSBen Gras __FBSDID("$FreeBSD$");
29*543adbedSBen Gras 
30*543adbedSBen Gras #ifdef HAVE_STRING_H
31*543adbedSBen Gras #include <string.h>
32*543adbedSBen Gras #endif
33*543adbedSBen Gras 
34*543adbedSBen Gras #include "pathmatch.h"
35*543adbedSBen Gras 
36*543adbedSBen Gras /*
37*543adbedSBen Gras  * Check whether a character 'c' is matched by a list specification [...]:
38*543adbedSBen Gras  *    * Leading '!' negates the class.
39*543adbedSBen Gras  *    * <char>-<char> is a range of characters
40*543adbedSBen Gras  *    * \<char> removes any special meaning for <char>
41*543adbedSBen Gras  *
42*543adbedSBen Gras  * Some interesting boundary cases:
43*543adbedSBen Gras  *   a-d-e is one range (a-d) followed by two single characters - and e.
44*543adbedSBen Gras  *   \a-\d is same as a-d
45*543adbedSBen Gras  *   a\-d is three single characters: a, d, -
46*543adbedSBen Gras  *   Trailing - is not special (so [a-] is two characters a and -).
47*543adbedSBen Gras  *   Initial - is not special ([a-] is same as [-a] is same as [\\-a])
48*543adbedSBen Gras  *   This function never sees a trailing \.
49*543adbedSBen Gras  *   [] always fails
50*543adbedSBen Gras  *   [!] always succeeds
51*543adbedSBen Gras  */
52*543adbedSBen Gras static int
pm_list(const char * start,const char * end,const char c,int flags)53*543adbedSBen Gras pm_list(const char *start, const char *end, const char c, int flags)
54*543adbedSBen Gras {
55*543adbedSBen Gras 	const char *p = start;
56*543adbedSBen Gras 	char rangeStart = '\0', nextRangeStart;
57*543adbedSBen Gras 	int match = 1, nomatch = 0;
58*543adbedSBen Gras 
59*543adbedSBen Gras 	/* This will be used soon... */
60*543adbedSBen Gras 	(void)flags; /* UNUSED */
61*543adbedSBen Gras 
62*543adbedSBen Gras 	/* If this is a negated class, return success for nomatch. */
63*543adbedSBen Gras 	if (*p == '!' && p < end) {
64*543adbedSBen Gras 		match = 0;
65*543adbedSBen Gras 		nomatch = 1;
66*543adbedSBen Gras 		++p;
67*543adbedSBen Gras 	}
68*543adbedSBen Gras 
69*543adbedSBen Gras 	while (p < end) {
70*543adbedSBen Gras 		nextRangeStart = '\0';
71*543adbedSBen Gras 		switch (*p) {
72*543adbedSBen Gras 		case '-':
73*543adbedSBen Gras 			/* Trailing or initial '-' is not special. */
74*543adbedSBen Gras 			if ((rangeStart == '\0') || (p == end - 1)) {
75*543adbedSBen Gras 				if (*p == c)
76*543adbedSBen Gras 					return (match);
77*543adbedSBen Gras 			} else {
78*543adbedSBen Gras 				char rangeEnd = *++p;
79*543adbedSBen Gras 				if (rangeEnd == '\\')
80*543adbedSBen Gras 					rangeEnd = *++p;
81*543adbedSBen Gras 				if ((rangeStart <= c) && (c <= rangeEnd))
82*543adbedSBen Gras 					return (match);
83*543adbedSBen Gras 			}
84*543adbedSBen Gras 			break;
85*543adbedSBen Gras 		case '\\':
86*543adbedSBen Gras 			++p;
87*543adbedSBen Gras 			/* Fall through */
88*543adbedSBen Gras 		default:
89*543adbedSBen Gras 			if (*p == c)
90*543adbedSBen Gras 				return (match);
91*543adbedSBen Gras 			nextRangeStart = *p; /* Possible start of range. */
92*543adbedSBen Gras 		}
93*543adbedSBen Gras 		rangeStart = nextRangeStart;
94*543adbedSBen Gras 		++p;
95*543adbedSBen Gras 	}
96*543adbedSBen Gras 	return (nomatch);
97*543adbedSBen Gras }
98*543adbedSBen Gras 
99*543adbedSBen Gras /*
100*543adbedSBen Gras  * If s is pointing to "./", ".//", "./././" or the like, skip it.
101*543adbedSBen Gras  */
102*543adbedSBen Gras static const char *
pm_slashskip(const char * s)103*543adbedSBen Gras pm_slashskip(const char *s) {
104*543adbedSBen Gras 	while ((*s == '/')
105*543adbedSBen Gras 	    || (s[0] == '.' && s[1] == '/')
106*543adbedSBen Gras 	    || (s[0] == '.' && s[1] == '\0'))
107*543adbedSBen Gras 		++s;
108*543adbedSBen Gras 	return (s);
109*543adbedSBen Gras }
110*543adbedSBen Gras 
111*543adbedSBen Gras static int
pm(const char * p,const char * s,int flags)112*543adbedSBen Gras pm(const char *p, const char *s, int flags)
113*543adbedSBen Gras {
114*543adbedSBen Gras 	const char *end;
115*543adbedSBen Gras 
116*543adbedSBen Gras 	/*
117*543adbedSBen Gras 	 * Ignore leading './', './/', '././', etc.
118*543adbedSBen Gras 	 */
119*543adbedSBen Gras 	if (s[0] == '.' && s[1] == '/')
120*543adbedSBen Gras 		s = pm_slashskip(s + 1);
121*543adbedSBen Gras 	if (p[0] == '.' && p[1] == '/')
122*543adbedSBen Gras 		p = pm_slashskip(p + 1);
123*543adbedSBen Gras 
124*543adbedSBen Gras 	for (;;) {
125*543adbedSBen Gras 		switch (*p) {
126*543adbedSBen Gras 		case '\0':
127*543adbedSBen Gras 			if (s[0] == '/') {
128*543adbedSBen Gras 				if (flags & PATHMATCH_NO_ANCHOR_END)
129*543adbedSBen Gras 					return (1);
130*543adbedSBen Gras 				/* "dir" == "dir/" == "dir/." */
131*543adbedSBen Gras 				s = pm_slashskip(s);
132*543adbedSBen Gras 			}
133*543adbedSBen Gras 			return (*s == '\0');
134*543adbedSBen Gras 		case '?':
135*543adbedSBen Gras 			/* ? always succeds, unless we hit end of 's' */
136*543adbedSBen Gras 			if (*s == '\0')
137*543adbedSBen Gras 				return (0);
138*543adbedSBen Gras 			break;
139*543adbedSBen Gras 		case '*':
140*543adbedSBen Gras 			/* "*" == "**" == "***" ... */
141*543adbedSBen Gras 			while (*p == '*')
142*543adbedSBen Gras 				++p;
143*543adbedSBen Gras 			/* Trailing '*' always succeeds. */
144*543adbedSBen Gras 			if (*p == '\0')
145*543adbedSBen Gras 				return (1);
146*543adbedSBen Gras 			while (*s) {
147*543adbedSBen Gras 				if (lafe_pathmatch(p, s, flags))
148*543adbedSBen Gras 					return (1);
149*543adbedSBen Gras 				++s;
150*543adbedSBen Gras 			}
151*543adbedSBen Gras 			return (0);
152*543adbedSBen Gras 		case '[':
153*543adbedSBen Gras 			/*
154*543adbedSBen Gras 			 * Find the end of the [...] character class,
155*543adbedSBen Gras 			 * ignoring \] that might occur within the class.
156*543adbedSBen Gras 			 */
157*543adbedSBen Gras 			end = p + 1;
158*543adbedSBen Gras 			while (*end != '\0' && *end != ']') {
159*543adbedSBen Gras 				if (*end == '\\' && end[1] != '\0')
160*543adbedSBen Gras 					++end;
161*543adbedSBen Gras 				++end;
162*543adbedSBen Gras 			}
163*543adbedSBen Gras 			if (*end == ']') {
164*543adbedSBen Gras 				/* We found [...], try to match it. */
165*543adbedSBen Gras 				if (!pm_list(p + 1, end, *s, flags))
166*543adbedSBen Gras 					return (0);
167*543adbedSBen Gras 				p = end; /* Jump to trailing ']' char. */
168*543adbedSBen Gras 				break;
169*543adbedSBen Gras 			} else
170*543adbedSBen Gras 				/* No final ']', so just match '['. */
171*543adbedSBen Gras 				if (*p != *s)
172*543adbedSBen Gras 					return (0);
173*543adbedSBen Gras 			break;
174*543adbedSBen Gras 		case '\\':
175*543adbedSBen Gras 			/* Trailing '\\' matches itself. */
176*543adbedSBen Gras 			if (p[1] == '\0') {
177*543adbedSBen Gras 				if (*s != '\\')
178*543adbedSBen Gras 					return (0);
179*543adbedSBen Gras 			} else {
180*543adbedSBen Gras 				++p;
181*543adbedSBen Gras 				if (*p != *s)
182*543adbedSBen Gras 					return (0);
183*543adbedSBen Gras 			}
184*543adbedSBen Gras 			break;
185*543adbedSBen Gras 		case '/':
186*543adbedSBen Gras 			if (*s != '/' && *s != '\0')
187*543adbedSBen Gras 				return (0);
188*543adbedSBen Gras 			/* Note: pattern "/\./" won't match "/";
189*543adbedSBen Gras 			 * pm_slashskip() correctly stops at backslash. */
190*543adbedSBen Gras 			p = pm_slashskip(p);
191*543adbedSBen Gras 			s = pm_slashskip(s);
192*543adbedSBen Gras 			if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
193*543adbedSBen Gras 				return (1);
194*543adbedSBen Gras 			--p; /* Counteract the increment below. */
195*543adbedSBen Gras 			--s;
196*543adbedSBen Gras 			break;
197*543adbedSBen Gras 		case '$':
198*543adbedSBen Gras 			/* '$' is special only at end of pattern and only
199*543adbedSBen Gras 			 * if PATHMATCH_NO_ANCHOR_END is specified. */
200*543adbedSBen Gras 			if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
201*543adbedSBen Gras 				/* "dir" == "dir/" == "dir/." */
202*543adbedSBen Gras 				return (*pm_slashskip(s) == '\0');
203*543adbedSBen Gras 			}
204*543adbedSBen Gras 			/* Otherwise, '$' is not special. */
205*543adbedSBen Gras 			/* FALL THROUGH */
206*543adbedSBen Gras 		default:
207*543adbedSBen Gras 			if (*p != *s)
208*543adbedSBen Gras 				return (0);
209*543adbedSBen Gras 			break;
210*543adbedSBen Gras 		}
211*543adbedSBen Gras 		++p;
212*543adbedSBen Gras 		++s;
213*543adbedSBen Gras 	}
214*543adbedSBen Gras }
215*543adbedSBen Gras 
216*543adbedSBen Gras /* Main entry point. */
217*543adbedSBen Gras int
lafe_pathmatch(const char * p,const char * s,int flags)218*543adbedSBen Gras lafe_pathmatch(const char *p, const char *s, int flags)
219*543adbedSBen Gras {
220*543adbedSBen Gras 	/* Empty pattern only matches the empty string. */
221*543adbedSBen Gras 	if (p == NULL || *p == '\0')
222*543adbedSBen Gras 		return (s == NULL || *s == '\0');
223*543adbedSBen Gras 
224*543adbedSBen Gras 	/* Leading '^' anchors the start of the pattern. */
225*543adbedSBen Gras 	if (*p == '^') {
226*543adbedSBen Gras 		++p;
227*543adbedSBen Gras 		flags &= ~PATHMATCH_NO_ANCHOR_START;
228*543adbedSBen Gras 	}
229*543adbedSBen Gras 
230*543adbedSBen Gras 	if (*p == '/' && *s != '/')
231*543adbedSBen Gras 		return (0);
232*543adbedSBen Gras 
233*543adbedSBen Gras 	/* Certain patterns and file names anchor implicitly. */
234*543adbedSBen Gras 	if (*p == '*' || *p == '/' || *p == '/') {
235*543adbedSBen Gras 		while (*p == '/')
236*543adbedSBen Gras 			++p;
237*543adbedSBen Gras 		while (*s == '/')
238*543adbedSBen Gras 			++s;
239*543adbedSBen Gras 		return (pm(p, s, flags));
240*543adbedSBen Gras 	}
241*543adbedSBen Gras 
242*543adbedSBen Gras 	/* If start is unanchored, try to match start of each path element. */
243*543adbedSBen Gras 	if (flags & PATHMATCH_NO_ANCHOR_START) {
244*543adbedSBen Gras 		for ( ; s != NULL; s = strchr(s, '/')) {
245*543adbedSBen Gras 			if (*s == '/')
246*543adbedSBen Gras 				s++;
247*543adbedSBen Gras 			if (pm(p, s, flags))
248*543adbedSBen Gras 				return (1);
249*543adbedSBen Gras 		}
250*543adbedSBen Gras 		return (0);
251*543adbedSBen Gras 	}
252*543adbedSBen Gras 
253*543adbedSBen Gras 	/* Default: Match from beginning. */
254*543adbedSBen Gras 	return (pm(p, s, flags));
255*543adbedSBen Gras }
256