xref: /openbsd-src/usr.sbin/httpd/patterns.c (revision 3fbbbb84db587aa93d86e38ba80e763351a866db)
1*3fbbbb84Ssemarie /*	$OpenBSD: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $	*/
259355b5aSreyk 
359355b5aSreyk /*
459355b5aSreyk  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
559355b5aSreyk  * Copyright (C) 1994-2015 Lua.org, PUC-Rio.
659355b5aSreyk  *
759355b5aSreyk  * Permission is hereby granted, free of charge, to any person obtaining
859355b5aSreyk  * a copy of this software and associated documentation files (the
959355b5aSreyk  * "Software"), to deal in the Software without restriction, including
1059355b5aSreyk  * without limitation the rights to use, copy, modify, merge, publish,
1159355b5aSreyk  * distribute, sublicense, and/or sell copies of the Software, and to
1259355b5aSreyk  * permit persons to whom the Software is furnished to do so, subject to
1359355b5aSreyk  * the following conditions:
1459355b5aSreyk  *
1559355b5aSreyk  * The above copyright notice and this permission notice shall be
1659355b5aSreyk  * included in all copies or substantial portions of the Software.
1759355b5aSreyk  *
1859355b5aSreyk  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1959355b5aSreyk  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2059355b5aSreyk  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2159355b5aSreyk  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
2259355b5aSreyk  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
2359355b5aSreyk  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
2459355b5aSreyk  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2559355b5aSreyk  */
2659355b5aSreyk 
2759355b5aSreyk /*
2859355b5aSreyk  * Derived from Lua 5.3.1:
29*3fbbbb84Ssemarie  * $Id: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $
3059355b5aSreyk  * Standard library for string operations and pattern-matching
3159355b5aSreyk  */
3259355b5aSreyk 
3325d98250Ssemarie #include <sys/types.h>
3425d98250Ssemarie #include <ctype.h>
3525d98250Ssemarie #include <errno.h>
3659355b5aSreyk #include <stddef.h>
3759355b5aSreyk #include <stdlib.h>
3859355b5aSreyk #include <string.h>
3959355b5aSreyk 
4059355b5aSreyk #include "patterns.h"
4159355b5aSreyk 
4259355b5aSreyk #define uchar(c)	((unsigned char)(c)) /* macro to 'unsign' a char */
4359355b5aSreyk #define CAP_UNFINISHED	(-1)
4459355b5aSreyk #define CAP_POSITION	(-2)
4559355b5aSreyk #define L_ESC		'%'
4659355b5aSreyk #define SPECIALS	"^$*+?.([%-"
4759355b5aSreyk 
4859355b5aSreyk struct match_state {
4959355b5aSreyk 	int matchdepth;		/* control for recursive depth (to avoid C
5059355b5aSreyk 				 * stack overflow) */
5159355b5aSreyk 	int repetitioncounter;	/* control the repetition items */
5259355b5aSreyk 	int maxcaptures;	/* configured capture limit */
5359355b5aSreyk 	const char *src_init;	/* init of source string */
5459355b5aSreyk 	const char *src_end;	/* end ('\0') of source string */
5559355b5aSreyk 	const char *p_end;	/* end ('\0') of pattern */
5659355b5aSreyk 	const char *error;	/* should be NULL */
5759355b5aSreyk 	int level;		/* total number of captures (finished or
5859355b5aSreyk 				 * unfinished) */
5959355b5aSreyk 	struct {
6059355b5aSreyk 		const char *init;
6159355b5aSreyk 		ptrdiff_t len;
6259355b5aSreyk 	} capture[MAXCAPTURES];
6359355b5aSreyk };
6459355b5aSreyk 
6559355b5aSreyk /* recursive function */
6659355b5aSreyk static const char *match(struct match_state *, const char *, const char *);
6759355b5aSreyk 
6859355b5aSreyk static int
match_error(struct match_state * ms,const char * error)6959355b5aSreyk match_error(struct match_state *ms, const char *error)
7059355b5aSreyk {
7159355b5aSreyk 	ms->error = ms->error == NULL ? error : ms->error;
7259355b5aSreyk 	return (-1);
7359355b5aSreyk }
7459355b5aSreyk 
7559355b5aSreyk static int
check_capture(struct match_state * ms,int l)7659355b5aSreyk check_capture(struct match_state *ms, int l)
7759355b5aSreyk {
7859355b5aSreyk 	l -= '1';
7959355b5aSreyk 	if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
8059355b5aSreyk 		return match_error(ms, "invalid capture index");
8159355b5aSreyk 	return (l);
8259355b5aSreyk }
8359355b5aSreyk 
8459355b5aSreyk static int
capture_to_close(struct match_state * ms)8559355b5aSreyk capture_to_close(struct match_state *ms)
8659355b5aSreyk {
8759355b5aSreyk 	int level = ms->level;
8859355b5aSreyk 	for (level--; level >= 0; level--)
8959355b5aSreyk 		if (ms->capture[level].len == CAP_UNFINISHED)
9059355b5aSreyk 			return (level);
9159355b5aSreyk 	return match_error(ms, "invalid pattern capture");
9259355b5aSreyk }
9359355b5aSreyk 
9459355b5aSreyk static const char *
classend(struct match_state * ms,const char * p)9559355b5aSreyk classend(struct match_state *ms, const char *p)
9659355b5aSreyk {
9759355b5aSreyk 	switch (*p++) {
9859355b5aSreyk 	case L_ESC:
9959355b5aSreyk 		if (p == ms->p_end)
10059355b5aSreyk 			match_error(ms,
10125d98250Ssemarie 			    "malformed pattern (ends with '%')");
10259355b5aSreyk 		return p + 1;
10359355b5aSreyk 	case '[':
10459355b5aSreyk 		if (*p == '^')
10559355b5aSreyk 			p++;
10659355b5aSreyk 		do {
10759355b5aSreyk 			/* look for a ']' */
10859355b5aSreyk 			if (p == ms->p_end) {
10959355b5aSreyk 				match_error(ms,
11059355b5aSreyk 				    "malformed pattern (missing ']')");
11159355b5aSreyk 				break;
11259355b5aSreyk 			}
11359355b5aSreyk 			if (*(p++) == L_ESC && p < ms->p_end) {
11459355b5aSreyk 				/* skip escapes (e.g. '%]') */
11559355b5aSreyk 				p++;
11659355b5aSreyk 			}
11759355b5aSreyk 		} while (*p != ']');
11859355b5aSreyk 		return p + 1;
11959355b5aSreyk 	default:
12059355b5aSreyk 		return p;
12159355b5aSreyk 	}
12259355b5aSreyk }
12359355b5aSreyk 
12459355b5aSreyk static int
match_class(int c,int cl)12559355b5aSreyk match_class(int c, int cl)
12659355b5aSreyk {
12759355b5aSreyk 	int res;
12859355b5aSreyk 	switch (tolower(cl)) {
12959355b5aSreyk 	case 'a':
13059355b5aSreyk 		res = isalpha(c);
13159355b5aSreyk 		break;
13259355b5aSreyk 	case 'c':
13359355b5aSreyk 		res = iscntrl(c);
13459355b5aSreyk 		break;
13559355b5aSreyk 	case 'd':
13659355b5aSreyk 		res = isdigit(c);
13759355b5aSreyk 		break;
13859355b5aSreyk 	case 'g':
13959355b5aSreyk 		res = isgraph(c);
14059355b5aSreyk 		break;
14159355b5aSreyk 	case 'l':
14259355b5aSreyk 		res = islower(c);
14359355b5aSreyk 		break;
14459355b5aSreyk 	case 'p':
14559355b5aSreyk 		res = ispunct(c);
14659355b5aSreyk 		break;
14759355b5aSreyk 	case 's':
14859355b5aSreyk 		res = isspace(c);
14959355b5aSreyk 		break;
15059355b5aSreyk 	case 'u':
15159355b5aSreyk 		res = isupper(c);
15259355b5aSreyk 		break;
15359355b5aSreyk 	case 'w':
15459355b5aSreyk 		res = isalnum(c);
15559355b5aSreyk 		break;
15659355b5aSreyk 	case 'x':
15759355b5aSreyk 		res = isxdigit(c);
15859355b5aSreyk 		break;
15959355b5aSreyk 	default:
16059355b5aSreyk 		return (cl == c);
16159355b5aSreyk 	}
16259355b5aSreyk 	return (islower(cl) ? res : !res);
16359355b5aSreyk }
16459355b5aSreyk 
16559355b5aSreyk static int
matchbracketclass(int c,const char * p,const char * ec)16659355b5aSreyk matchbracketclass(int c, const char *p, const char *ec)
16759355b5aSreyk {
16859355b5aSreyk 	int sig = 1;
16959355b5aSreyk 	if (*(p + 1) == '^') {
17059355b5aSreyk 		sig = 0;
17159355b5aSreyk 		/* skip the '^' */
17259355b5aSreyk 		p++;
17359355b5aSreyk 	}
17459355b5aSreyk 	while (++p < ec) {
17559355b5aSreyk 		if (*p == L_ESC) {
17659355b5aSreyk 			p++;
17759355b5aSreyk 			if (match_class(c, uchar(*p)))
17859355b5aSreyk 				return sig;
17959355b5aSreyk 		} else if ((*(p + 1) == '-') && (p + 2 < ec)) {
18059355b5aSreyk 			p += 2;
18159355b5aSreyk 			if (uchar(*(p - 2)) <= c && c <= uchar(*p))
18259355b5aSreyk 				return sig;
18359355b5aSreyk 		} else if (uchar(*p) == c)
18459355b5aSreyk 			return sig;
18559355b5aSreyk 	}
18659355b5aSreyk 	return !sig;
18759355b5aSreyk }
18859355b5aSreyk 
18959355b5aSreyk static int
singlematch(struct match_state * ms,const char * s,const char * p,const char * ep)19059355b5aSreyk singlematch(struct match_state *ms, const char *s, const char *p,
19159355b5aSreyk     const char *ep)
19259355b5aSreyk {
19359355b5aSreyk 	if (s >= ms->src_end)
19459355b5aSreyk 		return 0;
19559355b5aSreyk 	else {
19659355b5aSreyk 		int c = uchar(*s);
19759355b5aSreyk 		switch (*p) {
19859355b5aSreyk 		case '.':
19959355b5aSreyk 			/* matches any char */
20059355b5aSreyk 			return (1);
20159355b5aSreyk 		case L_ESC:
20259355b5aSreyk 			return match_class(c, uchar(*(p + 1)));
20359355b5aSreyk 		case '[':
20459355b5aSreyk 			return matchbracketclass(c, p, ep - 1);
20559355b5aSreyk 		default:
20659355b5aSreyk 			return (uchar(*p) == c);
20759355b5aSreyk 		}
20859355b5aSreyk 	}
20959355b5aSreyk }
21059355b5aSreyk 
21159355b5aSreyk static const char *
matchbalance(struct match_state * ms,const char * s,const char * p)21259355b5aSreyk matchbalance(struct match_state *ms, const char *s, const char *p)
21359355b5aSreyk {
21459355b5aSreyk 	if (p >= ms->p_end - 1) {
21559355b5aSreyk 		match_error(ms,
21659355b5aSreyk 		    "malformed pattern (missing arguments to '%b')");
21759355b5aSreyk 		return (NULL);
21859355b5aSreyk 	}
21959355b5aSreyk 	if (*s != *p)
22059355b5aSreyk 		return (NULL);
22159355b5aSreyk 	else {
22259355b5aSreyk 		int b = *p;
22359355b5aSreyk 		int e = *(p + 1);
22459355b5aSreyk 		int cont = 1;
22559355b5aSreyk 		while (++s < ms->src_end) {
22659355b5aSreyk 			if (*s == e) {
22759355b5aSreyk 				if (--cont == 0)
22859355b5aSreyk 					return s + 1;
22959355b5aSreyk 			} else if (*s == b)
23059355b5aSreyk 				cont++;
23159355b5aSreyk 		}
23259355b5aSreyk 	}
23359355b5aSreyk 
23459355b5aSreyk 	/* string ends out of balance */
23559355b5aSreyk 	return (NULL);
23659355b5aSreyk }
23759355b5aSreyk 
23859355b5aSreyk static const char *
max_expand(struct match_state * ms,const char * s,const char * p,const char * ep)23959355b5aSreyk max_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
24059355b5aSreyk {
24159355b5aSreyk 	ptrdiff_t i = 0;
24259355b5aSreyk 	/* counts maximum expand for item */
24359355b5aSreyk 	while (singlematch(ms, s + i, p, ep))
24459355b5aSreyk 		i++;
24559355b5aSreyk 	/* keeps trying to match with the maximum repetitions */
24659355b5aSreyk 	while (i >= 0) {
24759355b5aSreyk 		const char *res = match(ms, (s + i), ep + 1);
24859355b5aSreyk 		if (res)
24959355b5aSreyk 			return res;
25059355b5aSreyk 		/* else didn't match; reduce 1 repetition to try again */
25159355b5aSreyk 		i--;
25259355b5aSreyk 	}
25359355b5aSreyk 	return NULL;
25459355b5aSreyk }
25559355b5aSreyk 
25659355b5aSreyk static const char *
min_expand(struct match_state * ms,const char * s,const char * p,const char * ep)25759355b5aSreyk min_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
25859355b5aSreyk {
25959355b5aSreyk 	for (;;) {
26059355b5aSreyk 		const char *res = match(ms, s, ep + 1);
26159355b5aSreyk 		if (res != NULL)
26259355b5aSreyk 			return res;
26359355b5aSreyk 		else if (singlematch(ms, s, p, ep))
26459355b5aSreyk 			s++;	/* try with one more repetition */
26559355b5aSreyk 		else
26659355b5aSreyk 			return NULL;
26759355b5aSreyk 	}
26859355b5aSreyk }
26959355b5aSreyk 
27059355b5aSreyk static const char *
start_capture(struct match_state * ms,const char * s,const char * p,int what)27159355b5aSreyk start_capture(struct match_state *ms, const char *s, const char *p, int what)
27259355b5aSreyk {
27359355b5aSreyk 	const char *res;
27459355b5aSreyk 
27559355b5aSreyk 	int level = ms->level;
27659355b5aSreyk 	if (level >= ms->maxcaptures) {
27759355b5aSreyk 		match_error(ms, "too many captures");
27859355b5aSreyk 		return (NULL);
27959355b5aSreyk 	}
28059355b5aSreyk 	ms->capture[level].init = s;
28159355b5aSreyk 	ms->capture[level].len = what;
28259355b5aSreyk 	ms->level = level + 1;
28359355b5aSreyk 	/* undo capture if match failed */
28459355b5aSreyk 	if ((res = match(ms, s, p)) == NULL)
28559355b5aSreyk 		ms->level--;
28659355b5aSreyk 	return res;
28759355b5aSreyk }
28859355b5aSreyk 
28959355b5aSreyk static const char *
end_capture(struct match_state * ms,const char * s,const char * p)29059355b5aSreyk end_capture(struct match_state *ms, const char *s, const char *p)
29159355b5aSreyk {
29259355b5aSreyk 	int l = capture_to_close(ms);
29359355b5aSreyk 	const char *res;
29459355b5aSreyk 	if (l == -1)
29559355b5aSreyk 		return NULL;
29659355b5aSreyk 	/* close capture */
29759355b5aSreyk 	ms->capture[l].len = s - ms->capture[l].init;
29859355b5aSreyk 	/* undo capture if match failed */
29959355b5aSreyk 	if ((res = match(ms, s, p)) == NULL)
30059355b5aSreyk 		ms->capture[l].len = CAP_UNFINISHED;
30159355b5aSreyk 	return res;
30259355b5aSreyk }
30359355b5aSreyk 
30459355b5aSreyk static const char *
match_capture(struct match_state * ms,const char * s,int l)30559355b5aSreyk match_capture(struct match_state *ms, const char *s, int l)
30659355b5aSreyk {
30759355b5aSreyk 	size_t len;
30859355b5aSreyk 	l = check_capture(ms, l);
30959355b5aSreyk 	if (l == -1)
31059355b5aSreyk 		return NULL;
31159355b5aSreyk 	len = ms->capture[l].len;
31259355b5aSreyk 	if ((size_t) (ms->src_end - s) >= len &&
31359355b5aSreyk 	    memcmp(ms->capture[l].init, s, len) == 0)
31459355b5aSreyk 		return s + len;
31559355b5aSreyk 	else
31659355b5aSreyk 		return NULL;
31759355b5aSreyk }
31859355b5aSreyk 
31959355b5aSreyk static const char *
match(struct match_state * ms,const char * s,const char * p)32059355b5aSreyk match(struct match_state *ms, const char *s, const char *p)
32159355b5aSreyk {
32259355b5aSreyk 	const char *ep, *res;
32359355b5aSreyk 	char previous;
32459355b5aSreyk 
32559355b5aSreyk 	if (ms->matchdepth-- == 0) {
32659355b5aSreyk 		match_error(ms, "pattern too complex");
32759355b5aSreyk 		return (NULL);
32859355b5aSreyk 	}
32959355b5aSreyk 
33059355b5aSreyk 	/* using goto's to optimize tail recursion */
33159355b5aSreyk  init:
33259355b5aSreyk 	/* end of pattern? */
33359355b5aSreyk 	if (p != ms->p_end) {
33459355b5aSreyk 		switch (*p) {
33559355b5aSreyk 		case '(':
33659355b5aSreyk 			/* start capture */
33759355b5aSreyk 			if (*(p + 1) == ')')
33859355b5aSreyk 				/* position capture? */
33959355b5aSreyk 				s = start_capture(ms, s, p + 2, CAP_POSITION);
34059355b5aSreyk 			else
34159355b5aSreyk 				s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
34259355b5aSreyk 			break;
34359355b5aSreyk 		case ')':
34459355b5aSreyk 			/* end capture */
34559355b5aSreyk 			s = end_capture(ms, s, p + 1);
34659355b5aSreyk 			break;
34759355b5aSreyk 		case '$':
34859355b5aSreyk 			/* is the '$' the last char in pattern? */
34959355b5aSreyk 			if ((p + 1) != ms->p_end) {
35059355b5aSreyk 				/* no; go to default */
35159355b5aSreyk 				goto dflt;
35259355b5aSreyk 			}
35359355b5aSreyk 			 /* check end of string */
35459355b5aSreyk 			s = (s == ms->src_end) ? s : NULL;
35559355b5aSreyk 			break;
35659355b5aSreyk 		case L_ESC:
35759355b5aSreyk 			/* escaped sequences not in the format class[*+?-]? */
35859355b5aSreyk 			switch (*(p + 1)) {
35959355b5aSreyk 			case 'b':
36059355b5aSreyk 				/* balanced string? */
36159355b5aSreyk 				s = matchbalance(ms, s, p + 2);
36259355b5aSreyk 				if (s != NULL) {
36359355b5aSreyk 					p += 4;
36459355b5aSreyk 					/* return match(ms, s, p + 4); */
36559355b5aSreyk 					goto init;
36659355b5aSreyk 				} /* else fail (s == NULL) */
36759355b5aSreyk 				break;
36859355b5aSreyk 			case 'f':
36959355b5aSreyk 				/* frontier? */
37059355b5aSreyk 				p += 2;
37159355b5aSreyk 				if (*p != '[') {
37259355b5aSreyk 					match_error(ms, "missing '['"
37359355b5aSreyk 					    " after '%f' in pattern");
37459355b5aSreyk 					break;
37559355b5aSreyk 				}
37659355b5aSreyk 				/* points to what is next */
37759355b5aSreyk 				ep = classend(ms, p);
37859355b5aSreyk 				if (ms->error != NULL)
37959355b5aSreyk 					break;
38059355b5aSreyk 				previous =
38159355b5aSreyk 				    (s == ms->src_init) ? '\0' : *(s - 1);
38259355b5aSreyk 				if (!matchbracketclass(uchar(previous),
38359355b5aSreyk 				    p, ep - 1) &&
38459355b5aSreyk 				    matchbracketclass(uchar(*s),
38559355b5aSreyk 				    p, ep - 1)) {
38659355b5aSreyk 					p = ep;
38759355b5aSreyk 					/* return match(ms, s, ep); */
38859355b5aSreyk 					goto init;
38959355b5aSreyk 				}
39059355b5aSreyk 				/* match failed */
39159355b5aSreyk 				s = NULL;
39259355b5aSreyk 				break;
39359355b5aSreyk 			case '0':
39459355b5aSreyk 			case '1':
39559355b5aSreyk 			case '2':
39659355b5aSreyk 			case '3':
39759355b5aSreyk 			case '4':
39859355b5aSreyk 			case '5':
39959355b5aSreyk 			case '6':
40059355b5aSreyk 			case '7':
40159355b5aSreyk 			case '8':
40259355b5aSreyk 			case '9':
40359355b5aSreyk 				/* capture results (%0-%9)? */
40459355b5aSreyk 				s = match_capture(ms, s, uchar(*(p + 1)));
40559355b5aSreyk 				if (s != NULL) {
40659355b5aSreyk 					p += 2;
40759355b5aSreyk 					/* return match(ms, s, p + 2) */
40859355b5aSreyk 					goto init;
40959355b5aSreyk 				}
41059355b5aSreyk 				break;
41159355b5aSreyk 			default:
41259355b5aSreyk 				goto dflt;
41359355b5aSreyk 			}
41459355b5aSreyk 			break;
41559355b5aSreyk 		default:
41659355b5aSreyk 
41759355b5aSreyk 			/* pattern class plus optional suffix */
41859355b5aSreyk 	dflt:
41959355b5aSreyk 			/* points to optional suffix */
42059355b5aSreyk 			ep = classend(ms, p);
42159355b5aSreyk 			if (ms->error != NULL)
42259355b5aSreyk 				break;
42359355b5aSreyk 
42459355b5aSreyk 			/* does not match at least once? */
42559355b5aSreyk 			if (!singlematch(ms, s, p, ep)) {
42659355b5aSreyk 				if (ms->repetitioncounter-- == 0) {
42759355b5aSreyk 					match_error(ms, "max repetition items");
42859355b5aSreyk 					s = NULL; /* fail */
42959355b5aSreyk 				/* accept empty? */
43059355b5aSreyk 				} else if
43159355b5aSreyk 				    (*ep == '*' || *ep == '?' || *ep == '-') {
43259355b5aSreyk 					 p = ep + 1;
43359355b5aSreyk 					/* return match(ms, s, ep + 1); */
43459355b5aSreyk 					 goto init;
43559355b5aSreyk 				} else {
43659355b5aSreyk 					/* '+' or no suffix */
43759355b5aSreyk 					s = NULL; /* fail */
43859355b5aSreyk 				}
43959355b5aSreyk 			} else {
44059355b5aSreyk 				/* matched once */
44159355b5aSreyk 				/* handle optional suffix */
44259355b5aSreyk 				switch (*ep) {
44359355b5aSreyk 				case '?':
44459355b5aSreyk 					/* optional */
44559355b5aSreyk 					if ((res =
44659355b5aSreyk 					    match(ms, s + 1, ep + 1)) != NULL)
44759355b5aSreyk 						s = res;
44859355b5aSreyk 					else {
44959355b5aSreyk 						/*
45059355b5aSreyk 						 * else return
45159355b5aSreyk 						 *     match(ms, s, ep + 1);
45259355b5aSreyk 						 */
45359355b5aSreyk 						p = ep + 1;
45459355b5aSreyk 						goto init;
45559355b5aSreyk 					}
45659355b5aSreyk 					break;
45759355b5aSreyk 				case '+':
45859355b5aSreyk 					/* 1 or more repetitions */
45959355b5aSreyk 					s++; /* 1 match already done */
46059355b5aSreyk 					/* FALLTHROUGH */
46159355b5aSreyk 				case '*':
46259355b5aSreyk 					/* 0 or more repetitions */
46359355b5aSreyk 					s = max_expand(ms, s, p, ep);
46459355b5aSreyk 					break;
46559355b5aSreyk 				case '-':
46659355b5aSreyk 					/* 0 or more repetitions (minimum) */
46759355b5aSreyk 					s = min_expand(ms, s, p, ep);
46859355b5aSreyk 					break;
46959355b5aSreyk 				default:
47059355b5aSreyk 					/* no suffix */
47159355b5aSreyk 					s++;
47259355b5aSreyk 					p = ep;
47359355b5aSreyk 					/* return match(ms, s + 1, ep); */
47459355b5aSreyk 					goto init;
47559355b5aSreyk 				}
47659355b5aSreyk 			}
47759355b5aSreyk 			break;
47859355b5aSreyk 		}
47959355b5aSreyk 	}
48059355b5aSreyk 	ms->matchdepth++;
48159355b5aSreyk 	return s;
48259355b5aSreyk }
48359355b5aSreyk 
48459355b5aSreyk static const char *
lmemfind(const char * s1,size_t l1,const char * s2,size_t l2)48559355b5aSreyk lmemfind(const char *s1, size_t l1,
48659355b5aSreyk     const char *s2, size_t l2)
48759355b5aSreyk {
48859355b5aSreyk 	const char *init;
48959355b5aSreyk 
49059355b5aSreyk 	if (l2 == 0) {
49159355b5aSreyk 		/* empty strings are everywhere */
49259355b5aSreyk 		return (s1);
49359355b5aSreyk 	} else if (l2 > l1) {
49459355b5aSreyk 		/* avoids a negative 'l1' */
49559355b5aSreyk 		return (NULL);
49659355b5aSreyk 	} else {
49759355b5aSreyk 		/*
49859355b5aSreyk 		 * to search for a '*s2' inside 's1'
49959355b5aSreyk 		 * - 1st char will be checked by 'memchr'
50059355b5aSreyk 		 * - 's2' cannot be found after that
50159355b5aSreyk 		 */
50259355b5aSreyk 		l2--;
50359355b5aSreyk 		l1 = l1 - l2;
50459355b5aSreyk 		while (l1 > 0 &&
50559355b5aSreyk 		    (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
50659355b5aSreyk 			/* 1st char is already checked */
50759355b5aSreyk 			init++;
50859355b5aSreyk 			if (memcmp(init, s2 + 1, l2) == 0)
50959355b5aSreyk 				return init - 1;
51059355b5aSreyk 			else {
51159355b5aSreyk 				/* correct 'l1' and 's1' to try again */
51259355b5aSreyk 				l1 -= init - s1;
51359355b5aSreyk 				s1 = init;
51459355b5aSreyk 			}
51559355b5aSreyk 		}
51659355b5aSreyk 		/* not found */
51759355b5aSreyk 		return (NULL);
51859355b5aSreyk 	}
51959355b5aSreyk }
52059355b5aSreyk 
52159355b5aSreyk static int
push_onecapture(struct match_state * ms,int i,const char * s,const char * e,struct str_find * sm)52259355b5aSreyk push_onecapture(struct match_state *ms, int i, const char *s,
52359355b5aSreyk     const char *e, struct str_find *sm)
52459355b5aSreyk {
52559355b5aSreyk 	if (i >= ms->level) {
52659355b5aSreyk 		if (i == 0 || ms->level == 0) {
52759355b5aSreyk 			/* add whole match */
52859355b5aSreyk 			sm->sm_so = (off_t)(s - ms->src_init);
52959355b5aSreyk 			sm->sm_eo = (off_t)(e - s) + sm->sm_so;
53059355b5aSreyk 		} else
53159355b5aSreyk 			return match_error(ms, "invalid capture index");
53259355b5aSreyk 	} else {
53359355b5aSreyk 		ptrdiff_t l = ms->capture[i].len;
53459355b5aSreyk 		if (l == CAP_UNFINISHED)
53559355b5aSreyk 			return match_error(ms, "unfinished capture");
53659355b5aSreyk 		sm->sm_so = ms->capture[i].init - ms->src_init;
53759355b5aSreyk 		sm->sm_eo = sm->sm_so + l;
53859355b5aSreyk 	}
53959355b5aSreyk 	sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo;
54059355b5aSreyk 	return (0);
54159355b5aSreyk }
54259355b5aSreyk 
54359355b5aSreyk static int
push_captures(struct match_state * ms,const char * s,const char * e,struct str_find * sm,size_t nsm)54459355b5aSreyk push_captures(struct match_state *ms, const char *s, const char *e,
54559355b5aSreyk     struct str_find *sm, size_t nsm)
54659355b5aSreyk {
54759355b5aSreyk 	unsigned int i;
54859355b5aSreyk 	unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level;
54959355b5aSreyk 
55059355b5aSreyk 	if (nlevels > nsm)
55159355b5aSreyk 		nlevels = nsm;
55259355b5aSreyk 	for (i = 0; i < nlevels; i++)
55359355b5aSreyk 		if (push_onecapture(ms, i, s, e, sm + i) == -1)
55459355b5aSreyk 			break;
55559355b5aSreyk 
55659355b5aSreyk 	/* number of strings pushed */
55759355b5aSreyk 	return (nlevels);
55859355b5aSreyk }
55959355b5aSreyk 
56059355b5aSreyk /* check whether pattern has no special characters */
56159355b5aSreyk static int
nospecials(const char * p,size_t l)56259355b5aSreyk nospecials(const char *p, size_t l)
56359355b5aSreyk {
56459355b5aSreyk 	size_t upto = 0;
56559355b5aSreyk 
56659355b5aSreyk 	do {
56759355b5aSreyk 		if (strpbrk(p + upto, SPECIALS)) {
56859355b5aSreyk 			/* pattern has a special character */
56959355b5aSreyk 			return 0;
57059355b5aSreyk 		}
57159355b5aSreyk 		/* may have more after \0 */
57259355b5aSreyk 		upto += strlen(p + upto) + 1;
57359355b5aSreyk 	} while (upto <= l);
57459355b5aSreyk 
57559355b5aSreyk 	/* no special chars found */
57659355b5aSreyk 	return (1);
57759355b5aSreyk }
57859355b5aSreyk 
57959355b5aSreyk static int
str_find_aux(struct match_state * ms,const char * pattern,const char * string,struct str_find * sm,size_t nsm,off_t init)58059355b5aSreyk str_find_aux(struct match_state *ms, const char *pattern, const char *string,
58159355b5aSreyk     struct str_find *sm, size_t nsm, off_t init)
58259355b5aSreyk {
58359355b5aSreyk 	size_t		 ls = strlen(string);
58459355b5aSreyk 	size_t		 lp = strlen(pattern);
58559355b5aSreyk 	const char	*s = string;
58659355b5aSreyk 	const char	*p = pattern;
58759355b5aSreyk 	const char	*s1, *s2;
58859355b5aSreyk 	int		 anchor, i;
58959355b5aSreyk 
59059355b5aSreyk 	if (init < 0)
59159355b5aSreyk 		init = 0;
59259355b5aSreyk 	else if (init > (off_t)ls)
59359355b5aSreyk 		return match_error(ms, "starting after string's end");
59459355b5aSreyk 	s1 = s + init;
59559355b5aSreyk 
59659355b5aSreyk 	if (nospecials(p, lp)) {
59759355b5aSreyk 		/* do a plain search */
59859355b5aSreyk 		s2 = lmemfind(s1, ls - (size_t)init, p, lp);
59959355b5aSreyk 		if (s2 != NULL) {
60059355b5aSreyk 			i = 0;
60159355b5aSreyk 			sm[i].sm_so = 0;
60259355b5aSreyk 			sm[i].sm_eo = ls;
60359355b5aSreyk 			if (nsm > 1) {
60459355b5aSreyk 				i++;
60559355b5aSreyk 				sm[i].sm_so = s2 - s;
60659355b5aSreyk 				sm[i].sm_eo = (s2 - s) + lp;
60759355b5aSreyk 			}
60859355b5aSreyk 			return (i + 1);
60959355b5aSreyk 		}
61059355b5aSreyk 		return (0);
61159355b5aSreyk 	}
61259355b5aSreyk 
61359355b5aSreyk 	anchor = (*p == '^');
61459355b5aSreyk 	if (anchor) {
61559355b5aSreyk 		p++;
61659355b5aSreyk 		lp--;	/* skip anchor character */
61759355b5aSreyk 	}
61859355b5aSreyk 	ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1;
61959355b5aSreyk 	ms->matchdepth = MAXCCALLS;
62059355b5aSreyk 	ms->repetitioncounter = MAXREPETITION;
62159355b5aSreyk 	ms->src_init = s;
62259355b5aSreyk 	ms->src_end = s + ls;
62359355b5aSreyk 	ms->p_end = p + lp;
62459355b5aSreyk 	do {
62559355b5aSreyk 		const char *res;
62659355b5aSreyk 		ms->level = 0;
62759355b5aSreyk 		if ((res = match(ms, s1, p)) != NULL) {
62859355b5aSreyk 			sm->sm_so = 0;
62959355b5aSreyk 			sm->sm_eo = ls;
63059355b5aSreyk 			return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1;
63159355b5aSreyk 
63259355b5aSreyk 		} else if (ms->error != NULL) {
63359355b5aSreyk 			return 0;
63459355b5aSreyk 		}
63559355b5aSreyk 	} while (s1++ < ms->src_end && !anchor);
63659355b5aSreyk 
63759355b5aSreyk 	return 0;
63859355b5aSreyk }
63959355b5aSreyk 
64059355b5aSreyk int
str_find(const char * string,const char * pattern,struct str_find * sm,size_t nsm,const char ** errstr)64159355b5aSreyk str_find(const char *string, const char *pattern, struct str_find *sm,
64259355b5aSreyk     size_t nsm, const char **errstr)
64359355b5aSreyk {
64459355b5aSreyk 	struct match_state	ms;
64559355b5aSreyk 	int			ret;
64659355b5aSreyk 
64759355b5aSreyk 	memset(&ms, 0, sizeof(ms));
64859355b5aSreyk 	memset(sm, 0, nsm * sizeof(*sm));
64959355b5aSreyk 
65059355b5aSreyk 	ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
65159355b5aSreyk 	if (ms.error != NULL) {
65259355b5aSreyk 		/* Return 0 on error and store the error string */
65359355b5aSreyk 		*errstr = ms.error;
65459355b5aSreyk 		ret = 0;
65559355b5aSreyk 	} else
65659355b5aSreyk 		*errstr = NULL;
65759355b5aSreyk 
65859355b5aSreyk 	return (ret);
65959355b5aSreyk }
66059355b5aSreyk 
66159355b5aSreyk int
str_match(const char * string,const char * pattern,struct str_match * m,const char ** errstr)66259355b5aSreyk str_match(const char *string, const char *pattern, struct str_match *m,
66359355b5aSreyk     const char **errstr)
66459355b5aSreyk {
66559355b5aSreyk 	struct str_find		 sm[MAXCAPTURES];
66659355b5aSreyk 	struct match_state	 ms;
66759355b5aSreyk 	int			 ret, i;
66859355b5aSreyk 	size_t			 len, nsm;
66959355b5aSreyk 
67059355b5aSreyk 	nsm = MAXCAPTURES;
67159355b5aSreyk 	memset(&ms, 0, sizeof(ms));
67259355b5aSreyk 	memset(sm, 0, sizeof(sm));
67359355b5aSreyk 	memset(m, 0, sizeof(*m));
67459355b5aSreyk 
67559355b5aSreyk 	ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
6766c2b6544Sreyk 	if (ret <= 0 || ms.error != NULL) {
67759355b5aSreyk 		/* Return -1 on error and store the error string */
67859355b5aSreyk 		*errstr = ms.error;
67959355b5aSreyk 		return (-1);
68059355b5aSreyk 	}
68159355b5aSreyk 
68259355b5aSreyk 	if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) {
68359355b5aSreyk 		*errstr = strerror(errno);
68459355b5aSreyk 		return (-1);
68559355b5aSreyk 	}
68659355b5aSreyk 	m->sm_nmatch = ret;
68759355b5aSreyk 
68859355b5aSreyk 	for (i = 0; i < ret; i++) {
68959355b5aSreyk 		if (sm[i].sm_so > sm[i].sm_eo)
69059355b5aSreyk 			continue;
69159355b5aSreyk 		len = sm[i].sm_eo - sm[i].sm_so;
69259355b5aSreyk 		if ((m->sm_match[i] = strndup(string +
69359355b5aSreyk 		    sm[i].sm_so, len)) == NULL) {
69459355b5aSreyk 			*errstr = strerror(errno);
69559355b5aSreyk 			str_match_free(m);
69659355b5aSreyk 			return (-1);
69759355b5aSreyk 		}
69859355b5aSreyk 	}
69959355b5aSreyk 
70059355b5aSreyk 	*errstr = NULL;
70159355b5aSreyk 	return (0);
70259355b5aSreyk }
70359355b5aSreyk 
70459355b5aSreyk void
str_match_free(struct str_match * m)70559355b5aSreyk str_match_free(struct str_match *m)
70659355b5aSreyk {
70759355b5aSreyk 	unsigned int	 i = 0;
70859355b5aSreyk 	for (i = 0; i < m->sm_nmatch; i++)
70959355b5aSreyk 		free(m->sm_match[i]);
71059355b5aSreyk 	free(m->sm_match);
711*3fbbbb84Ssemarie 	m->sm_match = NULL;
71259355b5aSreyk 	m->sm_nmatch = 0;
71359355b5aSreyk }
714