1*3fbbbb84Ssemarie /* $OpenBSD: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $ */
259355b5aSreyk
359355b5aSreyk /*
459355b5aSreyk * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
559355b5aSreyk * Copyright (C) 1994-2015 Lua.org, PUC-Rio.
659355b5aSreyk *
759355b5aSreyk * Permission is hereby granted, free of charge, to any person obtaining
859355b5aSreyk * a copy of this software and associated documentation files (the
959355b5aSreyk * "Software"), to deal in the Software without restriction, including
1059355b5aSreyk * without limitation the rights to use, copy, modify, merge, publish,
1159355b5aSreyk * distribute, sublicense, and/or sell copies of the Software, and to
1259355b5aSreyk * permit persons to whom the Software is furnished to do so, subject to
1359355b5aSreyk * the following conditions:
1459355b5aSreyk *
1559355b5aSreyk * The above copyright notice and this permission notice shall be
1659355b5aSreyk * included in all copies or substantial portions of the Software.
1759355b5aSreyk *
1859355b5aSreyk * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1959355b5aSreyk * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2059355b5aSreyk * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2159355b5aSreyk * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
2259355b5aSreyk * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
2359355b5aSreyk * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
2459355b5aSreyk * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2559355b5aSreyk */
2659355b5aSreyk
2759355b5aSreyk /*
2859355b5aSreyk * Derived from Lua 5.3.1:
29*3fbbbb84Ssemarie * $Id: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $
3059355b5aSreyk * Standard library for string operations and pattern-matching
3159355b5aSreyk */
3259355b5aSreyk
3325d98250Ssemarie #include <sys/types.h>
3425d98250Ssemarie #include <ctype.h>
3525d98250Ssemarie #include <errno.h>
3659355b5aSreyk #include <stddef.h>
3759355b5aSreyk #include <stdlib.h>
3859355b5aSreyk #include <string.h>
3959355b5aSreyk
4059355b5aSreyk #include "patterns.h"
4159355b5aSreyk
4259355b5aSreyk #define uchar(c) ((unsigned char)(c)) /* macro to 'unsign' a char */
4359355b5aSreyk #define CAP_UNFINISHED (-1)
4459355b5aSreyk #define CAP_POSITION (-2)
4559355b5aSreyk #define L_ESC '%'
4659355b5aSreyk #define SPECIALS "^$*+?.([%-"
4759355b5aSreyk
4859355b5aSreyk struct match_state {
4959355b5aSreyk int matchdepth; /* control for recursive depth (to avoid C
5059355b5aSreyk * stack overflow) */
5159355b5aSreyk int repetitioncounter; /* control the repetition items */
5259355b5aSreyk int maxcaptures; /* configured capture limit */
5359355b5aSreyk const char *src_init; /* init of source string */
5459355b5aSreyk const char *src_end; /* end ('\0') of source string */
5559355b5aSreyk const char *p_end; /* end ('\0') of pattern */
5659355b5aSreyk const char *error; /* should be NULL */
5759355b5aSreyk int level; /* total number of captures (finished or
5859355b5aSreyk * unfinished) */
5959355b5aSreyk struct {
6059355b5aSreyk const char *init;
6159355b5aSreyk ptrdiff_t len;
6259355b5aSreyk } capture[MAXCAPTURES];
6359355b5aSreyk };
6459355b5aSreyk
6559355b5aSreyk /* recursive function */
6659355b5aSreyk static const char *match(struct match_state *, const char *, const char *);
6759355b5aSreyk
6859355b5aSreyk static int
match_error(struct match_state * ms,const char * error)6959355b5aSreyk match_error(struct match_state *ms, const char *error)
7059355b5aSreyk {
7159355b5aSreyk ms->error = ms->error == NULL ? error : ms->error;
7259355b5aSreyk return (-1);
7359355b5aSreyk }
7459355b5aSreyk
7559355b5aSreyk static int
check_capture(struct match_state * ms,int l)7659355b5aSreyk check_capture(struct match_state *ms, int l)
7759355b5aSreyk {
7859355b5aSreyk l -= '1';
7959355b5aSreyk if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
8059355b5aSreyk return match_error(ms, "invalid capture index");
8159355b5aSreyk return (l);
8259355b5aSreyk }
8359355b5aSreyk
8459355b5aSreyk static int
capture_to_close(struct match_state * ms)8559355b5aSreyk capture_to_close(struct match_state *ms)
8659355b5aSreyk {
8759355b5aSreyk int level = ms->level;
8859355b5aSreyk for (level--; level >= 0; level--)
8959355b5aSreyk if (ms->capture[level].len == CAP_UNFINISHED)
9059355b5aSreyk return (level);
9159355b5aSreyk return match_error(ms, "invalid pattern capture");
9259355b5aSreyk }
9359355b5aSreyk
9459355b5aSreyk static const char *
classend(struct match_state * ms,const char * p)9559355b5aSreyk classend(struct match_state *ms, const char *p)
9659355b5aSreyk {
9759355b5aSreyk switch (*p++) {
9859355b5aSreyk case L_ESC:
9959355b5aSreyk if (p == ms->p_end)
10059355b5aSreyk match_error(ms,
10125d98250Ssemarie "malformed pattern (ends with '%')");
10259355b5aSreyk return p + 1;
10359355b5aSreyk case '[':
10459355b5aSreyk if (*p == '^')
10559355b5aSreyk p++;
10659355b5aSreyk do {
10759355b5aSreyk /* look for a ']' */
10859355b5aSreyk if (p == ms->p_end) {
10959355b5aSreyk match_error(ms,
11059355b5aSreyk "malformed pattern (missing ']')");
11159355b5aSreyk break;
11259355b5aSreyk }
11359355b5aSreyk if (*(p++) == L_ESC && p < ms->p_end) {
11459355b5aSreyk /* skip escapes (e.g. '%]') */
11559355b5aSreyk p++;
11659355b5aSreyk }
11759355b5aSreyk } while (*p != ']');
11859355b5aSreyk return p + 1;
11959355b5aSreyk default:
12059355b5aSreyk return p;
12159355b5aSreyk }
12259355b5aSreyk }
12359355b5aSreyk
12459355b5aSreyk static int
match_class(int c,int cl)12559355b5aSreyk match_class(int c, int cl)
12659355b5aSreyk {
12759355b5aSreyk int res;
12859355b5aSreyk switch (tolower(cl)) {
12959355b5aSreyk case 'a':
13059355b5aSreyk res = isalpha(c);
13159355b5aSreyk break;
13259355b5aSreyk case 'c':
13359355b5aSreyk res = iscntrl(c);
13459355b5aSreyk break;
13559355b5aSreyk case 'd':
13659355b5aSreyk res = isdigit(c);
13759355b5aSreyk break;
13859355b5aSreyk case 'g':
13959355b5aSreyk res = isgraph(c);
14059355b5aSreyk break;
14159355b5aSreyk case 'l':
14259355b5aSreyk res = islower(c);
14359355b5aSreyk break;
14459355b5aSreyk case 'p':
14559355b5aSreyk res = ispunct(c);
14659355b5aSreyk break;
14759355b5aSreyk case 's':
14859355b5aSreyk res = isspace(c);
14959355b5aSreyk break;
15059355b5aSreyk case 'u':
15159355b5aSreyk res = isupper(c);
15259355b5aSreyk break;
15359355b5aSreyk case 'w':
15459355b5aSreyk res = isalnum(c);
15559355b5aSreyk break;
15659355b5aSreyk case 'x':
15759355b5aSreyk res = isxdigit(c);
15859355b5aSreyk break;
15959355b5aSreyk default:
16059355b5aSreyk return (cl == c);
16159355b5aSreyk }
16259355b5aSreyk return (islower(cl) ? res : !res);
16359355b5aSreyk }
16459355b5aSreyk
16559355b5aSreyk static int
matchbracketclass(int c,const char * p,const char * ec)16659355b5aSreyk matchbracketclass(int c, const char *p, const char *ec)
16759355b5aSreyk {
16859355b5aSreyk int sig = 1;
16959355b5aSreyk if (*(p + 1) == '^') {
17059355b5aSreyk sig = 0;
17159355b5aSreyk /* skip the '^' */
17259355b5aSreyk p++;
17359355b5aSreyk }
17459355b5aSreyk while (++p < ec) {
17559355b5aSreyk if (*p == L_ESC) {
17659355b5aSreyk p++;
17759355b5aSreyk if (match_class(c, uchar(*p)))
17859355b5aSreyk return sig;
17959355b5aSreyk } else if ((*(p + 1) == '-') && (p + 2 < ec)) {
18059355b5aSreyk p += 2;
18159355b5aSreyk if (uchar(*(p - 2)) <= c && c <= uchar(*p))
18259355b5aSreyk return sig;
18359355b5aSreyk } else if (uchar(*p) == c)
18459355b5aSreyk return sig;
18559355b5aSreyk }
18659355b5aSreyk return !sig;
18759355b5aSreyk }
18859355b5aSreyk
18959355b5aSreyk static int
singlematch(struct match_state * ms,const char * s,const char * p,const char * ep)19059355b5aSreyk singlematch(struct match_state *ms, const char *s, const char *p,
19159355b5aSreyk const char *ep)
19259355b5aSreyk {
19359355b5aSreyk if (s >= ms->src_end)
19459355b5aSreyk return 0;
19559355b5aSreyk else {
19659355b5aSreyk int c = uchar(*s);
19759355b5aSreyk switch (*p) {
19859355b5aSreyk case '.':
19959355b5aSreyk /* matches any char */
20059355b5aSreyk return (1);
20159355b5aSreyk case L_ESC:
20259355b5aSreyk return match_class(c, uchar(*(p + 1)));
20359355b5aSreyk case '[':
20459355b5aSreyk return matchbracketclass(c, p, ep - 1);
20559355b5aSreyk default:
20659355b5aSreyk return (uchar(*p) == c);
20759355b5aSreyk }
20859355b5aSreyk }
20959355b5aSreyk }
21059355b5aSreyk
21159355b5aSreyk static const char *
matchbalance(struct match_state * ms,const char * s,const char * p)21259355b5aSreyk matchbalance(struct match_state *ms, const char *s, const char *p)
21359355b5aSreyk {
21459355b5aSreyk if (p >= ms->p_end - 1) {
21559355b5aSreyk match_error(ms,
21659355b5aSreyk "malformed pattern (missing arguments to '%b')");
21759355b5aSreyk return (NULL);
21859355b5aSreyk }
21959355b5aSreyk if (*s != *p)
22059355b5aSreyk return (NULL);
22159355b5aSreyk else {
22259355b5aSreyk int b = *p;
22359355b5aSreyk int e = *(p + 1);
22459355b5aSreyk int cont = 1;
22559355b5aSreyk while (++s < ms->src_end) {
22659355b5aSreyk if (*s == e) {
22759355b5aSreyk if (--cont == 0)
22859355b5aSreyk return s + 1;
22959355b5aSreyk } else if (*s == b)
23059355b5aSreyk cont++;
23159355b5aSreyk }
23259355b5aSreyk }
23359355b5aSreyk
23459355b5aSreyk /* string ends out of balance */
23559355b5aSreyk return (NULL);
23659355b5aSreyk }
23759355b5aSreyk
23859355b5aSreyk static const char *
max_expand(struct match_state * ms,const char * s,const char * p,const char * ep)23959355b5aSreyk max_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
24059355b5aSreyk {
24159355b5aSreyk ptrdiff_t i = 0;
24259355b5aSreyk /* counts maximum expand for item */
24359355b5aSreyk while (singlematch(ms, s + i, p, ep))
24459355b5aSreyk i++;
24559355b5aSreyk /* keeps trying to match with the maximum repetitions */
24659355b5aSreyk while (i >= 0) {
24759355b5aSreyk const char *res = match(ms, (s + i), ep + 1);
24859355b5aSreyk if (res)
24959355b5aSreyk return res;
25059355b5aSreyk /* else didn't match; reduce 1 repetition to try again */
25159355b5aSreyk i--;
25259355b5aSreyk }
25359355b5aSreyk return NULL;
25459355b5aSreyk }
25559355b5aSreyk
25659355b5aSreyk static const char *
min_expand(struct match_state * ms,const char * s,const char * p,const char * ep)25759355b5aSreyk min_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
25859355b5aSreyk {
25959355b5aSreyk for (;;) {
26059355b5aSreyk const char *res = match(ms, s, ep + 1);
26159355b5aSreyk if (res != NULL)
26259355b5aSreyk return res;
26359355b5aSreyk else if (singlematch(ms, s, p, ep))
26459355b5aSreyk s++; /* try with one more repetition */
26559355b5aSreyk else
26659355b5aSreyk return NULL;
26759355b5aSreyk }
26859355b5aSreyk }
26959355b5aSreyk
27059355b5aSreyk static const char *
start_capture(struct match_state * ms,const char * s,const char * p,int what)27159355b5aSreyk start_capture(struct match_state *ms, const char *s, const char *p, int what)
27259355b5aSreyk {
27359355b5aSreyk const char *res;
27459355b5aSreyk
27559355b5aSreyk int level = ms->level;
27659355b5aSreyk if (level >= ms->maxcaptures) {
27759355b5aSreyk match_error(ms, "too many captures");
27859355b5aSreyk return (NULL);
27959355b5aSreyk }
28059355b5aSreyk ms->capture[level].init = s;
28159355b5aSreyk ms->capture[level].len = what;
28259355b5aSreyk ms->level = level + 1;
28359355b5aSreyk /* undo capture if match failed */
28459355b5aSreyk if ((res = match(ms, s, p)) == NULL)
28559355b5aSreyk ms->level--;
28659355b5aSreyk return res;
28759355b5aSreyk }
28859355b5aSreyk
28959355b5aSreyk static const char *
end_capture(struct match_state * ms,const char * s,const char * p)29059355b5aSreyk end_capture(struct match_state *ms, const char *s, const char *p)
29159355b5aSreyk {
29259355b5aSreyk int l = capture_to_close(ms);
29359355b5aSreyk const char *res;
29459355b5aSreyk if (l == -1)
29559355b5aSreyk return NULL;
29659355b5aSreyk /* close capture */
29759355b5aSreyk ms->capture[l].len = s - ms->capture[l].init;
29859355b5aSreyk /* undo capture if match failed */
29959355b5aSreyk if ((res = match(ms, s, p)) == NULL)
30059355b5aSreyk ms->capture[l].len = CAP_UNFINISHED;
30159355b5aSreyk return res;
30259355b5aSreyk }
30359355b5aSreyk
30459355b5aSreyk static const char *
match_capture(struct match_state * ms,const char * s,int l)30559355b5aSreyk match_capture(struct match_state *ms, const char *s, int l)
30659355b5aSreyk {
30759355b5aSreyk size_t len;
30859355b5aSreyk l = check_capture(ms, l);
30959355b5aSreyk if (l == -1)
31059355b5aSreyk return NULL;
31159355b5aSreyk len = ms->capture[l].len;
31259355b5aSreyk if ((size_t) (ms->src_end - s) >= len &&
31359355b5aSreyk memcmp(ms->capture[l].init, s, len) == 0)
31459355b5aSreyk return s + len;
31559355b5aSreyk else
31659355b5aSreyk return NULL;
31759355b5aSreyk }
31859355b5aSreyk
31959355b5aSreyk static const char *
match(struct match_state * ms,const char * s,const char * p)32059355b5aSreyk match(struct match_state *ms, const char *s, const char *p)
32159355b5aSreyk {
32259355b5aSreyk const char *ep, *res;
32359355b5aSreyk char previous;
32459355b5aSreyk
32559355b5aSreyk if (ms->matchdepth-- == 0) {
32659355b5aSreyk match_error(ms, "pattern too complex");
32759355b5aSreyk return (NULL);
32859355b5aSreyk }
32959355b5aSreyk
33059355b5aSreyk /* using goto's to optimize tail recursion */
33159355b5aSreyk init:
33259355b5aSreyk /* end of pattern? */
33359355b5aSreyk if (p != ms->p_end) {
33459355b5aSreyk switch (*p) {
33559355b5aSreyk case '(':
33659355b5aSreyk /* start capture */
33759355b5aSreyk if (*(p + 1) == ')')
33859355b5aSreyk /* position capture? */
33959355b5aSreyk s = start_capture(ms, s, p + 2, CAP_POSITION);
34059355b5aSreyk else
34159355b5aSreyk s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
34259355b5aSreyk break;
34359355b5aSreyk case ')':
34459355b5aSreyk /* end capture */
34559355b5aSreyk s = end_capture(ms, s, p + 1);
34659355b5aSreyk break;
34759355b5aSreyk case '$':
34859355b5aSreyk /* is the '$' the last char in pattern? */
34959355b5aSreyk if ((p + 1) != ms->p_end) {
35059355b5aSreyk /* no; go to default */
35159355b5aSreyk goto dflt;
35259355b5aSreyk }
35359355b5aSreyk /* check end of string */
35459355b5aSreyk s = (s == ms->src_end) ? s : NULL;
35559355b5aSreyk break;
35659355b5aSreyk case L_ESC:
35759355b5aSreyk /* escaped sequences not in the format class[*+?-]? */
35859355b5aSreyk switch (*(p + 1)) {
35959355b5aSreyk case 'b':
36059355b5aSreyk /* balanced string? */
36159355b5aSreyk s = matchbalance(ms, s, p + 2);
36259355b5aSreyk if (s != NULL) {
36359355b5aSreyk p += 4;
36459355b5aSreyk /* return match(ms, s, p + 4); */
36559355b5aSreyk goto init;
36659355b5aSreyk } /* else fail (s == NULL) */
36759355b5aSreyk break;
36859355b5aSreyk case 'f':
36959355b5aSreyk /* frontier? */
37059355b5aSreyk p += 2;
37159355b5aSreyk if (*p != '[') {
37259355b5aSreyk match_error(ms, "missing '['"
37359355b5aSreyk " after '%f' in pattern");
37459355b5aSreyk break;
37559355b5aSreyk }
37659355b5aSreyk /* points to what is next */
37759355b5aSreyk ep = classend(ms, p);
37859355b5aSreyk if (ms->error != NULL)
37959355b5aSreyk break;
38059355b5aSreyk previous =
38159355b5aSreyk (s == ms->src_init) ? '\0' : *(s - 1);
38259355b5aSreyk if (!matchbracketclass(uchar(previous),
38359355b5aSreyk p, ep - 1) &&
38459355b5aSreyk matchbracketclass(uchar(*s),
38559355b5aSreyk p, ep - 1)) {
38659355b5aSreyk p = ep;
38759355b5aSreyk /* return match(ms, s, ep); */
38859355b5aSreyk goto init;
38959355b5aSreyk }
39059355b5aSreyk /* match failed */
39159355b5aSreyk s = NULL;
39259355b5aSreyk break;
39359355b5aSreyk case '0':
39459355b5aSreyk case '1':
39559355b5aSreyk case '2':
39659355b5aSreyk case '3':
39759355b5aSreyk case '4':
39859355b5aSreyk case '5':
39959355b5aSreyk case '6':
40059355b5aSreyk case '7':
40159355b5aSreyk case '8':
40259355b5aSreyk case '9':
40359355b5aSreyk /* capture results (%0-%9)? */
40459355b5aSreyk s = match_capture(ms, s, uchar(*(p + 1)));
40559355b5aSreyk if (s != NULL) {
40659355b5aSreyk p += 2;
40759355b5aSreyk /* return match(ms, s, p + 2) */
40859355b5aSreyk goto init;
40959355b5aSreyk }
41059355b5aSreyk break;
41159355b5aSreyk default:
41259355b5aSreyk goto dflt;
41359355b5aSreyk }
41459355b5aSreyk break;
41559355b5aSreyk default:
41659355b5aSreyk
41759355b5aSreyk /* pattern class plus optional suffix */
41859355b5aSreyk dflt:
41959355b5aSreyk /* points to optional suffix */
42059355b5aSreyk ep = classend(ms, p);
42159355b5aSreyk if (ms->error != NULL)
42259355b5aSreyk break;
42359355b5aSreyk
42459355b5aSreyk /* does not match at least once? */
42559355b5aSreyk if (!singlematch(ms, s, p, ep)) {
42659355b5aSreyk if (ms->repetitioncounter-- == 0) {
42759355b5aSreyk match_error(ms, "max repetition items");
42859355b5aSreyk s = NULL; /* fail */
42959355b5aSreyk /* accept empty? */
43059355b5aSreyk } else if
43159355b5aSreyk (*ep == '*' || *ep == '?' || *ep == '-') {
43259355b5aSreyk p = ep + 1;
43359355b5aSreyk /* return match(ms, s, ep + 1); */
43459355b5aSreyk goto init;
43559355b5aSreyk } else {
43659355b5aSreyk /* '+' or no suffix */
43759355b5aSreyk s = NULL; /* fail */
43859355b5aSreyk }
43959355b5aSreyk } else {
44059355b5aSreyk /* matched once */
44159355b5aSreyk /* handle optional suffix */
44259355b5aSreyk switch (*ep) {
44359355b5aSreyk case '?':
44459355b5aSreyk /* optional */
44559355b5aSreyk if ((res =
44659355b5aSreyk match(ms, s + 1, ep + 1)) != NULL)
44759355b5aSreyk s = res;
44859355b5aSreyk else {
44959355b5aSreyk /*
45059355b5aSreyk * else return
45159355b5aSreyk * match(ms, s, ep + 1);
45259355b5aSreyk */
45359355b5aSreyk p = ep + 1;
45459355b5aSreyk goto init;
45559355b5aSreyk }
45659355b5aSreyk break;
45759355b5aSreyk case '+':
45859355b5aSreyk /* 1 or more repetitions */
45959355b5aSreyk s++; /* 1 match already done */
46059355b5aSreyk /* FALLTHROUGH */
46159355b5aSreyk case '*':
46259355b5aSreyk /* 0 or more repetitions */
46359355b5aSreyk s = max_expand(ms, s, p, ep);
46459355b5aSreyk break;
46559355b5aSreyk case '-':
46659355b5aSreyk /* 0 or more repetitions (minimum) */
46759355b5aSreyk s = min_expand(ms, s, p, ep);
46859355b5aSreyk break;
46959355b5aSreyk default:
47059355b5aSreyk /* no suffix */
47159355b5aSreyk s++;
47259355b5aSreyk p = ep;
47359355b5aSreyk /* return match(ms, s + 1, ep); */
47459355b5aSreyk goto init;
47559355b5aSreyk }
47659355b5aSreyk }
47759355b5aSreyk break;
47859355b5aSreyk }
47959355b5aSreyk }
48059355b5aSreyk ms->matchdepth++;
48159355b5aSreyk return s;
48259355b5aSreyk }
48359355b5aSreyk
48459355b5aSreyk static const char *
lmemfind(const char * s1,size_t l1,const char * s2,size_t l2)48559355b5aSreyk lmemfind(const char *s1, size_t l1,
48659355b5aSreyk const char *s2, size_t l2)
48759355b5aSreyk {
48859355b5aSreyk const char *init;
48959355b5aSreyk
49059355b5aSreyk if (l2 == 0) {
49159355b5aSreyk /* empty strings are everywhere */
49259355b5aSreyk return (s1);
49359355b5aSreyk } else if (l2 > l1) {
49459355b5aSreyk /* avoids a negative 'l1' */
49559355b5aSreyk return (NULL);
49659355b5aSreyk } else {
49759355b5aSreyk /*
49859355b5aSreyk * to search for a '*s2' inside 's1'
49959355b5aSreyk * - 1st char will be checked by 'memchr'
50059355b5aSreyk * - 's2' cannot be found after that
50159355b5aSreyk */
50259355b5aSreyk l2--;
50359355b5aSreyk l1 = l1 - l2;
50459355b5aSreyk while (l1 > 0 &&
50559355b5aSreyk (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
50659355b5aSreyk /* 1st char is already checked */
50759355b5aSreyk init++;
50859355b5aSreyk if (memcmp(init, s2 + 1, l2) == 0)
50959355b5aSreyk return init - 1;
51059355b5aSreyk else {
51159355b5aSreyk /* correct 'l1' and 's1' to try again */
51259355b5aSreyk l1 -= init - s1;
51359355b5aSreyk s1 = init;
51459355b5aSreyk }
51559355b5aSreyk }
51659355b5aSreyk /* not found */
51759355b5aSreyk return (NULL);
51859355b5aSreyk }
51959355b5aSreyk }
52059355b5aSreyk
52159355b5aSreyk static int
push_onecapture(struct match_state * ms,int i,const char * s,const char * e,struct str_find * sm)52259355b5aSreyk push_onecapture(struct match_state *ms, int i, const char *s,
52359355b5aSreyk const char *e, struct str_find *sm)
52459355b5aSreyk {
52559355b5aSreyk if (i >= ms->level) {
52659355b5aSreyk if (i == 0 || ms->level == 0) {
52759355b5aSreyk /* add whole match */
52859355b5aSreyk sm->sm_so = (off_t)(s - ms->src_init);
52959355b5aSreyk sm->sm_eo = (off_t)(e - s) + sm->sm_so;
53059355b5aSreyk } else
53159355b5aSreyk return match_error(ms, "invalid capture index");
53259355b5aSreyk } else {
53359355b5aSreyk ptrdiff_t l = ms->capture[i].len;
53459355b5aSreyk if (l == CAP_UNFINISHED)
53559355b5aSreyk return match_error(ms, "unfinished capture");
53659355b5aSreyk sm->sm_so = ms->capture[i].init - ms->src_init;
53759355b5aSreyk sm->sm_eo = sm->sm_so + l;
53859355b5aSreyk }
53959355b5aSreyk sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo;
54059355b5aSreyk return (0);
54159355b5aSreyk }
54259355b5aSreyk
54359355b5aSreyk static int
push_captures(struct match_state * ms,const char * s,const char * e,struct str_find * sm,size_t nsm)54459355b5aSreyk push_captures(struct match_state *ms, const char *s, const char *e,
54559355b5aSreyk struct str_find *sm, size_t nsm)
54659355b5aSreyk {
54759355b5aSreyk unsigned int i;
54859355b5aSreyk unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level;
54959355b5aSreyk
55059355b5aSreyk if (nlevels > nsm)
55159355b5aSreyk nlevels = nsm;
55259355b5aSreyk for (i = 0; i < nlevels; i++)
55359355b5aSreyk if (push_onecapture(ms, i, s, e, sm + i) == -1)
55459355b5aSreyk break;
55559355b5aSreyk
55659355b5aSreyk /* number of strings pushed */
55759355b5aSreyk return (nlevels);
55859355b5aSreyk }
55959355b5aSreyk
56059355b5aSreyk /* check whether pattern has no special characters */
56159355b5aSreyk static int
nospecials(const char * p,size_t l)56259355b5aSreyk nospecials(const char *p, size_t l)
56359355b5aSreyk {
56459355b5aSreyk size_t upto = 0;
56559355b5aSreyk
56659355b5aSreyk do {
56759355b5aSreyk if (strpbrk(p + upto, SPECIALS)) {
56859355b5aSreyk /* pattern has a special character */
56959355b5aSreyk return 0;
57059355b5aSreyk }
57159355b5aSreyk /* may have more after \0 */
57259355b5aSreyk upto += strlen(p + upto) + 1;
57359355b5aSreyk } while (upto <= l);
57459355b5aSreyk
57559355b5aSreyk /* no special chars found */
57659355b5aSreyk return (1);
57759355b5aSreyk }
57859355b5aSreyk
57959355b5aSreyk static int
str_find_aux(struct match_state * ms,const char * pattern,const char * string,struct str_find * sm,size_t nsm,off_t init)58059355b5aSreyk str_find_aux(struct match_state *ms, const char *pattern, const char *string,
58159355b5aSreyk struct str_find *sm, size_t nsm, off_t init)
58259355b5aSreyk {
58359355b5aSreyk size_t ls = strlen(string);
58459355b5aSreyk size_t lp = strlen(pattern);
58559355b5aSreyk const char *s = string;
58659355b5aSreyk const char *p = pattern;
58759355b5aSreyk const char *s1, *s2;
58859355b5aSreyk int anchor, i;
58959355b5aSreyk
59059355b5aSreyk if (init < 0)
59159355b5aSreyk init = 0;
59259355b5aSreyk else if (init > (off_t)ls)
59359355b5aSreyk return match_error(ms, "starting after string's end");
59459355b5aSreyk s1 = s + init;
59559355b5aSreyk
59659355b5aSreyk if (nospecials(p, lp)) {
59759355b5aSreyk /* do a plain search */
59859355b5aSreyk s2 = lmemfind(s1, ls - (size_t)init, p, lp);
59959355b5aSreyk if (s2 != NULL) {
60059355b5aSreyk i = 0;
60159355b5aSreyk sm[i].sm_so = 0;
60259355b5aSreyk sm[i].sm_eo = ls;
60359355b5aSreyk if (nsm > 1) {
60459355b5aSreyk i++;
60559355b5aSreyk sm[i].sm_so = s2 - s;
60659355b5aSreyk sm[i].sm_eo = (s2 - s) + lp;
60759355b5aSreyk }
60859355b5aSreyk return (i + 1);
60959355b5aSreyk }
61059355b5aSreyk return (0);
61159355b5aSreyk }
61259355b5aSreyk
61359355b5aSreyk anchor = (*p == '^');
61459355b5aSreyk if (anchor) {
61559355b5aSreyk p++;
61659355b5aSreyk lp--; /* skip anchor character */
61759355b5aSreyk }
61859355b5aSreyk ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1;
61959355b5aSreyk ms->matchdepth = MAXCCALLS;
62059355b5aSreyk ms->repetitioncounter = MAXREPETITION;
62159355b5aSreyk ms->src_init = s;
62259355b5aSreyk ms->src_end = s + ls;
62359355b5aSreyk ms->p_end = p + lp;
62459355b5aSreyk do {
62559355b5aSreyk const char *res;
62659355b5aSreyk ms->level = 0;
62759355b5aSreyk if ((res = match(ms, s1, p)) != NULL) {
62859355b5aSreyk sm->sm_so = 0;
62959355b5aSreyk sm->sm_eo = ls;
63059355b5aSreyk return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1;
63159355b5aSreyk
63259355b5aSreyk } else if (ms->error != NULL) {
63359355b5aSreyk return 0;
63459355b5aSreyk }
63559355b5aSreyk } while (s1++ < ms->src_end && !anchor);
63659355b5aSreyk
63759355b5aSreyk return 0;
63859355b5aSreyk }
63959355b5aSreyk
64059355b5aSreyk int
str_find(const char * string,const char * pattern,struct str_find * sm,size_t nsm,const char ** errstr)64159355b5aSreyk str_find(const char *string, const char *pattern, struct str_find *sm,
64259355b5aSreyk size_t nsm, const char **errstr)
64359355b5aSreyk {
64459355b5aSreyk struct match_state ms;
64559355b5aSreyk int ret;
64659355b5aSreyk
64759355b5aSreyk memset(&ms, 0, sizeof(ms));
64859355b5aSreyk memset(sm, 0, nsm * sizeof(*sm));
64959355b5aSreyk
65059355b5aSreyk ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
65159355b5aSreyk if (ms.error != NULL) {
65259355b5aSreyk /* Return 0 on error and store the error string */
65359355b5aSreyk *errstr = ms.error;
65459355b5aSreyk ret = 0;
65559355b5aSreyk } else
65659355b5aSreyk *errstr = NULL;
65759355b5aSreyk
65859355b5aSreyk return (ret);
65959355b5aSreyk }
66059355b5aSreyk
66159355b5aSreyk int
str_match(const char * string,const char * pattern,struct str_match * m,const char ** errstr)66259355b5aSreyk str_match(const char *string, const char *pattern, struct str_match *m,
66359355b5aSreyk const char **errstr)
66459355b5aSreyk {
66559355b5aSreyk struct str_find sm[MAXCAPTURES];
66659355b5aSreyk struct match_state ms;
66759355b5aSreyk int ret, i;
66859355b5aSreyk size_t len, nsm;
66959355b5aSreyk
67059355b5aSreyk nsm = MAXCAPTURES;
67159355b5aSreyk memset(&ms, 0, sizeof(ms));
67259355b5aSreyk memset(sm, 0, sizeof(sm));
67359355b5aSreyk memset(m, 0, sizeof(*m));
67459355b5aSreyk
67559355b5aSreyk ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
6766c2b6544Sreyk if (ret <= 0 || ms.error != NULL) {
67759355b5aSreyk /* Return -1 on error and store the error string */
67859355b5aSreyk *errstr = ms.error;
67959355b5aSreyk return (-1);
68059355b5aSreyk }
68159355b5aSreyk
68259355b5aSreyk if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) {
68359355b5aSreyk *errstr = strerror(errno);
68459355b5aSreyk return (-1);
68559355b5aSreyk }
68659355b5aSreyk m->sm_nmatch = ret;
68759355b5aSreyk
68859355b5aSreyk for (i = 0; i < ret; i++) {
68959355b5aSreyk if (sm[i].sm_so > sm[i].sm_eo)
69059355b5aSreyk continue;
69159355b5aSreyk len = sm[i].sm_eo - sm[i].sm_so;
69259355b5aSreyk if ((m->sm_match[i] = strndup(string +
69359355b5aSreyk sm[i].sm_so, len)) == NULL) {
69459355b5aSreyk *errstr = strerror(errno);
69559355b5aSreyk str_match_free(m);
69659355b5aSreyk return (-1);
69759355b5aSreyk }
69859355b5aSreyk }
69959355b5aSreyk
70059355b5aSreyk *errstr = NULL;
70159355b5aSreyk return (0);
70259355b5aSreyk }
70359355b5aSreyk
70459355b5aSreyk void
str_match_free(struct str_match * m)70559355b5aSreyk str_match_free(struct str_match *m)
70659355b5aSreyk {
70759355b5aSreyk unsigned int i = 0;
70859355b5aSreyk for (i = 0; i < m->sm_nmatch; i++)
70959355b5aSreyk free(m->sm_match[i]);
71059355b5aSreyk free(m->sm_match);
711*3fbbbb84Ssemarie m->sm_match = NULL;
71259355b5aSreyk m->sm_nmatch = 0;
71359355b5aSreyk }
714