1*936f2fafSclaudio /* $OpenBSD: rmatch.c,v 1.4 2024/02/20 10:36:23 claudio Exp $ */
257987d16Sclaudio
357987d16Sclaudio /*
457987d16Sclaudio * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
557987d16Sclaudio *
657987d16Sclaudio * Permission to use, copy, modify, and distribute this software for any
757987d16Sclaudio * purpose with or without fee is hereby granted, provided that the above
857987d16Sclaudio * copyright notice and this permission notice appear in all copies.
957987d16Sclaudio *
1057987d16Sclaudio * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1157987d16Sclaudio * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1257987d16Sclaudio * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1357987d16Sclaudio * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1457987d16Sclaudio * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1557987d16Sclaudio * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1657987d16Sclaudio * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1757987d16Sclaudio */
1857987d16Sclaudio
1957987d16Sclaudio /*
2057987d16Sclaudio * Copyright (c) 1989, 1993, 1994
2157987d16Sclaudio * The Regents of the University of California. All rights reserved.
2257987d16Sclaudio *
2357987d16Sclaudio * This code is derived from software contributed to Berkeley by
2457987d16Sclaudio * Guido van Rossum.
2557987d16Sclaudio *
2657987d16Sclaudio * Redistribution and use in source and binary forms, with or without
2757987d16Sclaudio * modification, are permitted provided that the following conditions
2857987d16Sclaudio * are met:
2957987d16Sclaudio * 1. Redistributions of source code must retain the above copyright
3057987d16Sclaudio * notice, this list of conditions and the following disclaimer.
3157987d16Sclaudio * 2. Redistributions in binary form must reproduce the above copyright
3257987d16Sclaudio * notice, this list of conditions and the following disclaimer in the
3357987d16Sclaudio * documentation and/or other materials provided with the distribution.
3457987d16Sclaudio * 3. Neither the name of the University nor the names of its contributors
3557987d16Sclaudio * may be used to endorse or promote products derived from this software
3657987d16Sclaudio * without specific prior written permission.
3757987d16Sclaudio *
3857987d16Sclaudio * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
3957987d16Sclaudio * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4057987d16Sclaudio * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4157987d16Sclaudio * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
4257987d16Sclaudio * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4357987d16Sclaudio * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4457987d16Sclaudio * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4557987d16Sclaudio * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4657987d16Sclaudio * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
4757987d16Sclaudio * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
4857987d16Sclaudio * SUCH DAMAGE.
4957987d16Sclaudio */
5057987d16Sclaudio
5157987d16Sclaudio #include <ctype.h>
5257987d16Sclaudio #include <stdio.h>
5357987d16Sclaudio #include <string.h>
5457987d16Sclaudio #include <limits.h>
5557987d16Sclaudio
5657987d16Sclaudio #include "charclass.h"
57722539fdSclaudio #include "extern.h"
5857987d16Sclaudio
5957987d16Sclaudio #define RANGE_MATCH 1
6057987d16Sclaudio #define RANGE_NOMATCH 0
6157987d16Sclaudio #define RANGE_ERROR (-1)
6257987d16Sclaudio
6357987d16Sclaudio static int
classmatch(const char * pattern,char test,const char ** ep)6457987d16Sclaudio classmatch(const char *pattern, char test, const char **ep)
6557987d16Sclaudio {
6657987d16Sclaudio const char *mismatch = pattern;
6757987d16Sclaudio const struct cclass *cc;
6857987d16Sclaudio const char *colon;
6957987d16Sclaudio size_t len;
7057987d16Sclaudio int rval = RANGE_NOMATCH;
7157987d16Sclaudio
7257987d16Sclaudio if (*pattern++ != ':') {
7357987d16Sclaudio *ep = mismatch;
7457987d16Sclaudio return RANGE_ERROR;
7557987d16Sclaudio }
7657987d16Sclaudio if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
7757987d16Sclaudio *ep = mismatch;
7857987d16Sclaudio return RANGE_ERROR;
7957987d16Sclaudio }
8057987d16Sclaudio *ep = colon + 2;
8157987d16Sclaudio len = (size_t)(colon - pattern);
8257987d16Sclaudio
8357987d16Sclaudio for (cc = cclasses; cc->name != NULL; cc++) {
8457987d16Sclaudio if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
8557987d16Sclaudio if (cc->isctype((unsigned char)test))
8657987d16Sclaudio rval = RANGE_MATCH;
8757987d16Sclaudio return rval;
8857987d16Sclaudio }
8957987d16Sclaudio }
9057987d16Sclaudio
9157987d16Sclaudio /* invalid character class, treat as normal text */
9257987d16Sclaudio *ep = mismatch;
9357987d16Sclaudio return RANGE_ERROR;
9457987d16Sclaudio }
9557987d16Sclaudio
9657987d16Sclaudio static int
rangematch(const char ** pp,char test)9757987d16Sclaudio rangematch(const char **pp, char test)
9857987d16Sclaudio {
9957987d16Sclaudio const char *pattern = *pp;
10057987d16Sclaudio int negate, ok;
10157987d16Sclaudio char c, c2;
10257987d16Sclaudio
10357987d16Sclaudio /*
10457987d16Sclaudio * A bracket expression starting with an unquoted circumflex
10557987d16Sclaudio * character produces unspecified results (IEEE 1003.2-1992,
10657987d16Sclaudio * 3.13.2). This implementation treats it like '!', for
10757987d16Sclaudio * consistency with the regular expression syntax.
10857987d16Sclaudio * J.T. Conklin (conklin@ngai.kaleida.com)
10957987d16Sclaudio */
11057987d16Sclaudio if ((negate = (*pattern == '!' || *pattern == '^')))
11157987d16Sclaudio ++pattern;
11257987d16Sclaudio
11357987d16Sclaudio /*
11457987d16Sclaudio * A right bracket shall lose its special meaning and represent
11557987d16Sclaudio * itself in a bracket expression if it occurs first in the list.
11657987d16Sclaudio * -- POSIX.2 2.8.3.2
11757987d16Sclaudio */
11857987d16Sclaudio ok = 0;
11957987d16Sclaudio c = *pattern++;
12057987d16Sclaudio do {
12157987d16Sclaudio if (c == '[') {
12257987d16Sclaudio switch (classmatch(pattern, test, &pattern)) {
12357987d16Sclaudio case RANGE_MATCH:
12457987d16Sclaudio ok = 1;
12557987d16Sclaudio continue;
12657987d16Sclaudio case RANGE_NOMATCH:
12757987d16Sclaudio continue;
12857987d16Sclaudio default:
129d9a51c35Sjmc /* invalid character class, treat literally. */
13057987d16Sclaudio break;
13157987d16Sclaudio }
13257987d16Sclaudio }
13357987d16Sclaudio if (c == '\\')
13457987d16Sclaudio c = *pattern++;
13557987d16Sclaudio if (c == '\0')
13657987d16Sclaudio return RANGE_ERROR;
13757987d16Sclaudio /* patterns can not match on '/' */
13857987d16Sclaudio if (c == '/')
13957987d16Sclaudio return RANGE_NOMATCH;
14057987d16Sclaudio if (*pattern == '-'
14157987d16Sclaudio && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
14257987d16Sclaudio pattern += 2;
14357987d16Sclaudio if (c2 == '\\')
14457987d16Sclaudio c2 = *pattern++;
14557987d16Sclaudio if (c2 == '\0')
14657987d16Sclaudio return RANGE_ERROR;
14757987d16Sclaudio if (c <= test && test <= c2)
14857987d16Sclaudio ok = 1;
14957987d16Sclaudio } else if (c == test)
15057987d16Sclaudio ok = 1;
15157987d16Sclaudio } while ((c = *pattern++) != ']');
15257987d16Sclaudio
15357987d16Sclaudio *pp = pattern;
15457987d16Sclaudio return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
15557987d16Sclaudio }
15657987d16Sclaudio
15757987d16Sclaudio /*
15857987d16Sclaudio * Single character match, advances pattern as much as needed.
159d9a51c35Sjmc * Return 0 on match and !0 (aka 1) on mismatch.
16057987d16Sclaudio * When matched pp is advanced to the end of the pattern matched.
16157987d16Sclaudio */
16257987d16Sclaudio static int
matchchar(const char ** pp,const char in)16357987d16Sclaudio matchchar(const char **pp, const char in)
16457987d16Sclaudio {
16557987d16Sclaudio const char *pattern = *pp;
16657987d16Sclaudio char c;
16757987d16Sclaudio int rv = 0;
16857987d16Sclaudio
16957987d16Sclaudio switch (c = *pattern++) {
17057987d16Sclaudio case '?':
17157987d16Sclaudio if (in == '\0')
17257987d16Sclaudio rv = 1;
17357987d16Sclaudio if (in == '/')
17457987d16Sclaudio rv = 1;
17557987d16Sclaudio break;
17657987d16Sclaudio case '[':
17757987d16Sclaudio if (in == '\0')
17857987d16Sclaudio rv = 1;
17957987d16Sclaudio if (in == '/')
18057987d16Sclaudio rv = 1;
18157987d16Sclaudio if (rv == 1)
18257987d16Sclaudio break;
18357987d16Sclaudio
18457987d16Sclaudio switch (rangematch(&pattern, in)) {
18557987d16Sclaudio case RANGE_ERROR:
18657987d16Sclaudio /* not a good range, treat as normal text */
18757987d16Sclaudio goto normal;
18857987d16Sclaudio case RANGE_MATCH:
18957987d16Sclaudio break;
19057987d16Sclaudio case RANGE_NOMATCH:
19157987d16Sclaudio rv = 1;
19257987d16Sclaudio }
19357987d16Sclaudio break;
19457987d16Sclaudio case '\\':
19557987d16Sclaudio if ((c = *pattern++) == '\0') {
19657987d16Sclaudio c = '\\';
19757987d16Sclaudio --pattern;
19857987d16Sclaudio }
19957987d16Sclaudio /* FALLTHROUGH */
20057987d16Sclaudio default:
20157987d16Sclaudio normal:
20257987d16Sclaudio if (c != in)
20357987d16Sclaudio rv = 1;
20457987d16Sclaudio break;
20557987d16Sclaudio }
20657987d16Sclaudio
20757987d16Sclaudio *pp = pattern;
20857987d16Sclaudio return rv;
20957987d16Sclaudio }
21057987d16Sclaudio
21157987d16Sclaudio /*
21257987d16Sclaudio * Do a substring match. If wild is set then the pattern started with a '*'.
21357987d16Sclaudio * The match will go until '*', '/' or '\0' is encountered in pattern or
21457987d16Sclaudio * the input string is consumed up to end.
21557987d16Sclaudio * The pattern and string handles pp and ss are updated only on success.
21657987d16Sclaudio */
21757987d16Sclaudio static int
matchsub(const char ** pp,const char ** ss,const char * end,int wild)21857987d16Sclaudio matchsub(const char **pp, const char **ss, const char *end, int wild)
21957987d16Sclaudio {
22057987d16Sclaudio const char *pattern = *pp;
22157987d16Sclaudio const char *p = pattern;
22257987d16Sclaudio const char *string = *ss;
22357987d16Sclaudio size_t matchlen;
22457987d16Sclaudio
22557987d16Sclaudio /* first calculate how many characters the submatch will consume */
22657987d16Sclaudio for (matchlen = 0; *p != '\0'; matchlen++) {
22757987d16Sclaudio if (p[0] == '*')
22857987d16Sclaudio break;
22957987d16Sclaudio /* '/' acts as barrier */
23057987d16Sclaudio if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
23157987d16Sclaudio if (wild) {
23257987d16Sclaudio /* match needs to match up to end of segment */
23357987d16Sclaudio if (string > end - matchlen)
23457987d16Sclaudio return 1;
23557987d16Sclaudio string = end - matchlen;
23657987d16Sclaudio wild = 0;
23757987d16Sclaudio }
23857987d16Sclaudio break;
23957987d16Sclaudio }
24057987d16Sclaudio /*
24157987d16Sclaudio * skip forward one character in pattern by doing a
24257987d16Sclaudio * dummy lookup.
24357987d16Sclaudio */
24457987d16Sclaudio matchchar(&p, ' ');
24557987d16Sclaudio }
24657987d16Sclaudio
24757987d16Sclaudio /* not enough char to match */
24857987d16Sclaudio if (string > end - matchlen)
24957987d16Sclaudio return 1;
25057987d16Sclaudio
25157987d16Sclaudio if (*p == '\0') {
25257987d16Sclaudio if (wild) {
25357987d16Sclaudio /* match needs to match up to end of segment */
25457987d16Sclaudio string = end - matchlen;
25557987d16Sclaudio wild = 0;
25657987d16Sclaudio }
25757987d16Sclaudio }
25857987d16Sclaudio
25957987d16Sclaudio while (*pattern != '\0' && *pattern != '*') {
26057987d16Sclaudio /* eat possible escape char before '/' */
26157987d16Sclaudio if (pattern[0] == '\\' && pattern[1] == '/')
26257987d16Sclaudio pattern++;
263*936f2fafSclaudio if (pattern[0] == '/') {
264*936f2fafSclaudio /* hit the barrier but we still have characters left */
265*936f2fafSclaudio if (string < end)
266*936f2fafSclaudio return 1;
26757987d16Sclaudio break;
268*936f2fafSclaudio }
26957987d16Sclaudio
27057987d16Sclaudio /* check if there are still characters available to compare */
27157987d16Sclaudio if (string >= end)
27257987d16Sclaudio return 1;
27357987d16Sclaudio /* Compare one char at a time. */
27457987d16Sclaudio if (!matchchar(&pattern, *string++))
27557987d16Sclaudio continue;
27657987d16Sclaudio if (wild) {
27757987d16Sclaudio /* skip forward one char and restart match */
27857987d16Sclaudio string = ++*ss;
27957987d16Sclaudio pattern = *pp;
28057987d16Sclaudio /* can it still match? */
28157987d16Sclaudio if (string > end - matchlen)
28257987d16Sclaudio return 1;
28357987d16Sclaudio } else {
28457987d16Sclaudio /* failed match */
28557987d16Sclaudio return 1;
28657987d16Sclaudio }
28757987d16Sclaudio }
28857987d16Sclaudio
28957987d16Sclaudio *pp = pattern;
29057987d16Sclaudio *ss = string;
29157987d16Sclaudio return 0;
29257987d16Sclaudio }
29357987d16Sclaudio
29457987d16Sclaudio /*
29557987d16Sclaudio * File matching with the addition of the special '**'.
29657987d16Sclaudio * Returns 0 on match and !0 for strings that do not match pattern.
29757987d16Sclaudio */
29857987d16Sclaudio int
rmatch(const char * pattern,const char * string,int leading_dir)29957987d16Sclaudio rmatch(const char *pattern, const char *string, int leading_dir)
30057987d16Sclaudio {
30157987d16Sclaudio const char *segend, *segnext, *mismatch = NULL;
30257987d16Sclaudio int wild, starstar;
30357987d16Sclaudio
30457987d16Sclaudio while (*pattern && *string) {
30557987d16Sclaudio
30657987d16Sclaudio /* handle leading '/' first */
30757987d16Sclaudio if (pattern[0] == '\\' && pattern[1] == '/')
30857987d16Sclaudio pattern++;
30957987d16Sclaudio if (*string == '/' && *pattern == '/') {
31057987d16Sclaudio string++;
31157987d16Sclaudio pattern++;
31257987d16Sclaudio }
31357987d16Sclaudio
31457987d16Sclaudio /* match to the next '/' in string */
31557987d16Sclaudio segend = strchr(string, '/');
31657987d16Sclaudio if (segend == NULL)
31757987d16Sclaudio segend = strchr(string, '\0');
31857987d16Sclaudio
31957987d16Sclaudio while (*pattern) {
32057987d16Sclaudio /*
32157987d16Sclaudio * Check for '*' and '**'. For '*' reduce '*' and '?'
32257987d16Sclaudio * sequences into n-'?' and trailing '*'.
32357987d16Sclaudio * For '**' this optimisation can not be done
32457987d16Sclaudio * since '**???/' will match 'a/aa/aaa/' but not
32557987d16Sclaudio * 'a/aa/aa/' still additional '*' will be reduced.
32657987d16Sclaudio */
32757987d16Sclaudio wild = 0;
32857987d16Sclaudio starstar = 0;
32957987d16Sclaudio for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
33057987d16Sclaudio if (pattern[0] == '*') {
33157987d16Sclaudio if (pattern[1] == '*') {
33257987d16Sclaudio starstar = 1;
33357987d16Sclaudio pattern++;
33457987d16Sclaudio }
33557987d16Sclaudio wild = 1;
33657987d16Sclaudio } else if (!starstar) { /* pattern[0] == '?' */
33757987d16Sclaudio if (string < segend && *string != '/')
33857987d16Sclaudio string++;
33957987d16Sclaudio else
34057987d16Sclaudio /* no match possible */
34157987d16Sclaudio return 1;
34257987d16Sclaudio } else
34357987d16Sclaudio break;
34457987d16Sclaudio }
34557987d16Sclaudio
34657987d16Sclaudio /* pattern ends in '**' so it is a match */
34757987d16Sclaudio if (starstar && *pattern == '\0')
34857987d16Sclaudio return 0;
34957987d16Sclaudio
35057987d16Sclaudio if (starstar) {
35157987d16Sclaudio segnext = segend;
35257987d16Sclaudio mismatch = pattern;
35357987d16Sclaudio }
35457987d16Sclaudio
35557987d16Sclaudio while (string < segend) {
35657987d16Sclaudio if (matchsub(&pattern, &string, segend, wild)) {
35757987d16Sclaudio failed_match:
35857987d16Sclaudio /*
35957987d16Sclaudio * failed to match, if starstar retry
36057987d16Sclaudio * with the next segment.
36157987d16Sclaudio */
36257987d16Sclaudio if (mismatch) {
36357987d16Sclaudio pattern = mismatch;
36457987d16Sclaudio wild = 1;
36557987d16Sclaudio string = segnext;
36657987d16Sclaudio if (*string == '/')
36757987d16Sclaudio string++;
36857987d16Sclaudio segend = strchr(string, '/');
36957987d16Sclaudio if (!segend)
37057987d16Sclaudio segend = strchr(string,
37157987d16Sclaudio '\0');
37257987d16Sclaudio segnext = segend;
37357987d16Sclaudio if (string < segend)
37457987d16Sclaudio continue;
37557987d16Sclaudio }
37657987d16Sclaudio /* no match possible */
37757987d16Sclaudio return 1;
37857987d16Sclaudio }
37957987d16Sclaudio break;
38057987d16Sclaudio }
38157987d16Sclaudio
38257987d16Sclaudio /* at end of string segment, eat up any extra '*' */
38357987d16Sclaudio if (string >= segend && *pattern != '*')
38457987d16Sclaudio break;
38557987d16Sclaudio }
38657987d16Sclaudio if (*string != '\0' && *string != '/')
38757987d16Sclaudio goto failed_match;
38857987d16Sclaudio if (*pattern != '\0' && *pattern != '/')
38957987d16Sclaudio goto failed_match;
39057987d16Sclaudio }
39157987d16Sclaudio
39257987d16Sclaudio /* if both pattern and string are consumed it was a match */
39357987d16Sclaudio if (*pattern == '\0' && *string == '\0')
39457987d16Sclaudio return 0;
39557987d16Sclaudio /* if leading_dir is set then string can also be '/' for success */
39657987d16Sclaudio if (leading_dir && *pattern == '\0' && *string == '/')
39757987d16Sclaudio return 0;
39857987d16Sclaudio /* else failure */
39957987d16Sclaudio return 1;
40057987d16Sclaudio }
401