1 /* $OpenBSD: rmatch.c,v 1.4 2024/02/20 10:36:23 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 1989, 1993, 1994
21 * The Regents of the University of California. All rights reserved.
22 *
23 * This code is derived from software contributed to Berkeley by
24 * Guido van Rossum.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. Neither the name of the University nor the names of its contributors
35 * may be used to endorse or promote products derived from this software
36 * without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 */
50
51 #include <ctype.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <limits.h>
55
56 #include "charclass.h"
57 #include "extern.h"
58
59 #define RANGE_MATCH 1
60 #define RANGE_NOMATCH 0
61 #define RANGE_ERROR (-1)
62
63 static int
classmatch(const char * pattern,char test,const char ** ep)64 classmatch(const char *pattern, char test, const char **ep)
65 {
66 const char *mismatch = pattern;
67 const struct cclass *cc;
68 const char *colon;
69 size_t len;
70 int rval = RANGE_NOMATCH;
71
72 if (*pattern++ != ':') {
73 *ep = mismatch;
74 return RANGE_ERROR;
75 }
76 if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
77 *ep = mismatch;
78 return RANGE_ERROR;
79 }
80 *ep = colon + 2;
81 len = (size_t)(colon - pattern);
82
83 for (cc = cclasses; cc->name != NULL; cc++) {
84 if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
85 if (cc->isctype((unsigned char)test))
86 rval = RANGE_MATCH;
87 return rval;
88 }
89 }
90
91 /* invalid character class, treat as normal text */
92 *ep = mismatch;
93 return RANGE_ERROR;
94 }
95
96 static int
rangematch(const char ** pp,char test)97 rangematch(const char **pp, char test)
98 {
99 const char *pattern = *pp;
100 int negate, ok;
101 char c, c2;
102
103 /*
104 * A bracket expression starting with an unquoted circumflex
105 * character produces unspecified results (IEEE 1003.2-1992,
106 * 3.13.2). This implementation treats it like '!', for
107 * consistency with the regular expression syntax.
108 * J.T. Conklin (conklin@ngai.kaleida.com)
109 */
110 if ((negate = (*pattern == '!' || *pattern == '^')))
111 ++pattern;
112
113 /*
114 * A right bracket shall lose its special meaning and represent
115 * itself in a bracket expression if it occurs first in the list.
116 * -- POSIX.2 2.8.3.2
117 */
118 ok = 0;
119 c = *pattern++;
120 do {
121 if (c == '[') {
122 switch (classmatch(pattern, test, &pattern)) {
123 case RANGE_MATCH:
124 ok = 1;
125 continue;
126 case RANGE_NOMATCH:
127 continue;
128 default:
129 /* invalid character class, treat literally. */
130 break;
131 }
132 }
133 if (c == '\\')
134 c = *pattern++;
135 if (c == '\0')
136 return RANGE_ERROR;
137 /* patterns can not match on '/' */
138 if (c == '/')
139 return RANGE_NOMATCH;
140 if (*pattern == '-'
141 && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
142 pattern += 2;
143 if (c2 == '\\')
144 c2 = *pattern++;
145 if (c2 == '\0')
146 return RANGE_ERROR;
147 if (c <= test && test <= c2)
148 ok = 1;
149 } else if (c == test)
150 ok = 1;
151 } while ((c = *pattern++) != ']');
152
153 *pp = pattern;
154 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
155 }
156
157 /*
158 * Single character match, advances pattern as much as needed.
159 * Return 0 on match and !0 (aka 1) on mismatch.
160 * When matched pp is advanced to the end of the pattern matched.
161 */
162 static int
matchchar(const char ** pp,const char in)163 matchchar(const char **pp, const char in)
164 {
165 const char *pattern = *pp;
166 char c;
167 int rv = 0;
168
169 switch (c = *pattern++) {
170 case '?':
171 if (in == '\0')
172 rv = 1;
173 if (in == '/')
174 rv = 1;
175 break;
176 case '[':
177 if (in == '\0')
178 rv = 1;
179 if (in == '/')
180 rv = 1;
181 if (rv == 1)
182 break;
183
184 switch (rangematch(&pattern, in)) {
185 case RANGE_ERROR:
186 /* not a good range, treat as normal text */
187 goto normal;
188 case RANGE_MATCH:
189 break;
190 case RANGE_NOMATCH:
191 rv = 1;
192 }
193 break;
194 case '\\':
195 if ((c = *pattern++) == '\0') {
196 c = '\\';
197 --pattern;
198 }
199 /* FALLTHROUGH */
200 default:
201 normal:
202 if (c != in)
203 rv = 1;
204 break;
205 }
206
207 *pp = pattern;
208 return rv;
209 }
210
211 /*
212 * Do a substring match. If wild is set then the pattern started with a '*'.
213 * The match will go until '*', '/' or '\0' is encountered in pattern or
214 * the input string is consumed up to end.
215 * The pattern and string handles pp and ss are updated only on success.
216 */
217 static int
matchsub(const char ** pp,const char ** ss,const char * end,int wild)218 matchsub(const char **pp, const char **ss, const char *end, int wild)
219 {
220 const char *pattern = *pp;
221 const char *p = pattern;
222 const char *string = *ss;
223 size_t matchlen;
224
225 /* first calculate how many characters the submatch will consume */
226 for (matchlen = 0; *p != '\0'; matchlen++) {
227 if (p[0] == '*')
228 break;
229 /* '/' acts as barrier */
230 if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
231 if (wild) {
232 /* match needs to match up to end of segment */
233 if (string > end - matchlen)
234 return 1;
235 string = end - matchlen;
236 wild = 0;
237 }
238 break;
239 }
240 /*
241 * skip forward one character in pattern by doing a
242 * dummy lookup.
243 */
244 matchchar(&p, ' ');
245 }
246
247 /* not enough char to match */
248 if (string > end - matchlen)
249 return 1;
250
251 if (*p == '\0') {
252 if (wild) {
253 /* match needs to match up to end of segment */
254 string = end - matchlen;
255 wild = 0;
256 }
257 }
258
259 while (*pattern != '\0' && *pattern != '*') {
260 /* eat possible escape char before '/' */
261 if (pattern[0] == '\\' && pattern[1] == '/')
262 pattern++;
263 if (pattern[0] == '/') {
264 /* hit the barrier but we still have characters left */
265 if (string < end)
266 return 1;
267 break;
268 }
269
270 /* check if there are still characters available to compare */
271 if (string >= end)
272 return 1;
273 /* Compare one char at a time. */
274 if (!matchchar(&pattern, *string++))
275 continue;
276 if (wild) {
277 /* skip forward one char and restart match */
278 string = ++*ss;
279 pattern = *pp;
280 /* can it still match? */
281 if (string > end - matchlen)
282 return 1;
283 } else {
284 /* failed match */
285 return 1;
286 }
287 }
288
289 *pp = pattern;
290 *ss = string;
291 return 0;
292 }
293
294 /*
295 * File matching with the addition of the special '**'.
296 * Returns 0 on match and !0 for strings that do not match pattern.
297 */
298 int
rmatch(const char * pattern,const char * string,int leading_dir)299 rmatch(const char *pattern, const char *string, int leading_dir)
300 {
301 const char *segend, *segnext, *mismatch = NULL;
302 int wild, starstar;
303
304 while (*pattern && *string) {
305
306 /* handle leading '/' first */
307 if (pattern[0] == '\\' && pattern[1] == '/')
308 pattern++;
309 if (*string == '/' && *pattern == '/') {
310 string++;
311 pattern++;
312 }
313
314 /* match to the next '/' in string */
315 segend = strchr(string, '/');
316 if (segend == NULL)
317 segend = strchr(string, '\0');
318
319 while (*pattern) {
320 /*
321 * Check for '*' and '**'. For '*' reduce '*' and '?'
322 * sequences into n-'?' and trailing '*'.
323 * For '**' this optimisation can not be done
324 * since '**???/' will match 'a/aa/aaa/' but not
325 * 'a/aa/aa/' still additional '*' will be reduced.
326 */
327 wild = 0;
328 starstar = 0;
329 for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
330 if (pattern[0] == '*') {
331 if (pattern[1] == '*') {
332 starstar = 1;
333 pattern++;
334 }
335 wild = 1;
336 } else if (!starstar) { /* pattern[0] == '?' */
337 if (string < segend && *string != '/')
338 string++;
339 else
340 /* no match possible */
341 return 1;
342 } else
343 break;
344 }
345
346 /* pattern ends in '**' so it is a match */
347 if (starstar && *pattern == '\0')
348 return 0;
349
350 if (starstar) {
351 segnext = segend;
352 mismatch = pattern;
353 }
354
355 while (string < segend) {
356 if (matchsub(&pattern, &string, segend, wild)) {
357 failed_match:
358 /*
359 * failed to match, if starstar retry
360 * with the next segment.
361 */
362 if (mismatch) {
363 pattern = mismatch;
364 wild = 1;
365 string = segnext;
366 if (*string == '/')
367 string++;
368 segend = strchr(string, '/');
369 if (!segend)
370 segend = strchr(string,
371 '\0');
372 segnext = segend;
373 if (string < segend)
374 continue;
375 }
376 /* no match possible */
377 return 1;
378 }
379 break;
380 }
381
382 /* at end of string segment, eat up any extra '*' */
383 if (string >= segend && *pattern != '*')
384 break;
385 }
386 if (*string != '\0' && *string != '/')
387 goto failed_match;
388 if (*pattern != '\0' && *pattern != '/')
389 goto failed_match;
390 }
391
392 /* if both pattern and string are consumed it was a match */
393 if (*pattern == '\0' && *string == '\0')
394 return 0;
395 /* if leading_dir is set then string can also be '/' for success */
396 if (leading_dir && *pattern == '\0' && *string == '/')
397 return 0;
398 /* else failure */
399 return 1;
400 }
401