xref: /csrg-svn/lib/libc/regex/engine.c (revision 55882)
155845Sbostic /*-
255845Sbostic  * Copyright (c) 1992 Henry Spencer.
355845Sbostic  * Copyright (c) 1992 The Regents of the University of California.
455845Sbostic  * All rights reserved.
555845Sbostic  *
655845Sbostic  * This code is derived from software contributed to Berkeley by
755845Sbostic  * Henry Spencer of the University of Toronto.
855845Sbostic  *
955845Sbostic  * %sccs.include.redist.c%
1055845Sbostic  *
11*55882Sbostic  *	@(#)engine.c	5.2 (Berkeley) 08/10/92
1255845Sbostic  */
1355845Sbostic 
1455845Sbostic /*
1555845Sbostic  * The matching engine and friends.  This file is #included by regexec.c
1655845Sbostic  * after suitable #defines of a variety of macros used herein, so that
1755845Sbostic  * different state representations can be used without duplicating masses
1855845Sbostic  * of code.
1955845Sbostic  */
2055845Sbostic 
2155845Sbostic #ifdef SNAMES
2255845Sbostic #define	matcher	smatcher
2355845Sbostic #define	fast	sfast
2455845Sbostic #define	slow	sslow
2555845Sbostic #define	dissect	sdissect
2655845Sbostic #define	backref	sbackref
2755845Sbostic #define	expand	sexpand
2855845Sbostic #define	step	sstep
2955845Sbostic #define	print	sprint
3055845Sbostic #define	match	smat
3155845Sbostic #endif
3255845Sbostic #ifdef LNAMES
3355845Sbostic #define	matcher	lmatcher
3455845Sbostic #define	fast	lfast
3555845Sbostic #define	slow	lslow
3655845Sbostic #define	dissect	ldissect
3755845Sbostic #define	backref	lbackref
3855845Sbostic #define	expand	lexpand
3955845Sbostic #define	step	lstep
4055845Sbostic #define	print	lprint
4155845Sbostic #define	match	lmat
4255845Sbostic #endif
4355845Sbostic 
4455845Sbostic /* another structure passed up and down to avoid zillions of parameters */
4555845Sbostic struct match {
4655845Sbostic 	struct re_guts *g;
4755845Sbostic 	int eflags;
4855845Sbostic 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
4955845Sbostic 	uchar *offp;		/* offsets work from here */
5055845Sbostic 	uchar *beginp;		/* start of string -- virtual NUL precedes */
5155845Sbostic 	uchar *endp;		/* end of string -- virtual NUL here */
5255845Sbostic 	uchar *coldp;		/* can be no match starting before here */
5355845Sbostic 	uchar **lastpos;	/* [nplus+1] */
5455845Sbostic 	STATEVARS;
5555845Sbostic 	states st;		/* current states */
5655845Sbostic 	states fresh;		/* states for a fresh start */
5755845Sbostic 	states tmp;		/* temporary */
5855845Sbostic 	states empty;		/* empty set of states */
5955845Sbostic };
6055845Sbostic 
6155845Sbostic #ifndef NDEBUG
6255845Sbostic STATIC void print();
6355845Sbostic extern char *regchar();
6455845Sbostic #define	SP(t, s, c)	{ if (m->eflags&REG_TRACE) print(m->g, t, s, c, stdout); }
6555845Sbostic #define	DO(t, p1, p2, s1, s2)	{ if (m->eflags&REG_TRACE) { \
6655845Sbostic 					printf("%s %s-", t, regchar(*(p1))); \
6755845Sbostic 					printf("%s ", regchar(*(p2))); \
6855845Sbostic 					printf("%ld-%ld\n", s1, s2); } }
6955845Sbostic #else
7055845Sbostic #define	SP(t, s, c)	/* nothing */
7155845Sbostic #define	DO(t, p1, p2, s1, s2)	/* nothing */
7255845Sbostic #endif
7355845Sbostic 
7455845Sbostic STATIC uchar *fast();
7555845Sbostic STATIC uchar *slow();
7655845Sbostic STATIC uchar *dissect();
7755845Sbostic STATIC uchar *backref();
7855845Sbostic STATIC states expand();
7955845Sbostic STATIC states step();
8055845Sbostic 
8155845Sbostic /*
8255845Sbostic  - matcher - the actual matching engine
8355845Sbostic  */
8455845Sbostic static int			/* 0 success, REG_NOMATCH failure */
8555845Sbostic matcher(g, string, nmatch, pmatch, eflags)
8655845Sbostic register struct re_guts *g;
8755845Sbostic uchar *string;
8855845Sbostic size_t nmatch;
8955845Sbostic regmatch_t pmatch[];
9055845Sbostic int eflags;
9155845Sbostic {
9255845Sbostic 	register uchar *endp;
9355845Sbostic 	register int i;
9455845Sbostic 	struct match mv;
9555845Sbostic 	register struct match *m = &mv;
9655845Sbostic 	register uchar *dp;
9755845Sbostic 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
9855845Sbostic 	const register sopno gl = g->laststate;
9955845Sbostic 	uchar *start;
10055845Sbostic 	uchar *stop;
10155845Sbostic 
10255845Sbostic 	if (g->cflags&REG_NOSUB)
10355845Sbostic 		nmatch = 0;		/* simplify tests */
10455845Sbostic 	if (eflags&REG_STARTEND) {
10555845Sbostic 		start = string + pmatch[0].rm_so;
10655845Sbostic 		stop = string + pmatch[0].rm_eo;
10755845Sbostic 	} else {
10855845Sbostic 		start = string;
10955845Sbostic 		stop = start + strlen((char *)start);
11055845Sbostic 	}
11155845Sbostic 
112*55882Sbostic 	/* prescreening; this does wonders for this rather slow code */
113*55882Sbostic 	if (g->must != NULL) {
114*55882Sbostic 		for (dp = start; dp < stop; dp++)
115*55882Sbostic 			if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen &&
116*55882Sbostic 			strncmp((char *)dp, g->must, (size_t)g->mlen) == 0)
117*55882Sbostic 				break;
118*55882Sbostic 		if (dp == stop)		/* we didn't find g->must */
119*55882Sbostic 			return(REG_NOMATCH);
120*55882Sbostic 	}
121*55882Sbostic 
12255845Sbostic 	/* match struct setup */
12355845Sbostic 	m->g = g;
12455845Sbostic 	m->eflags = eflags;
12555845Sbostic 	m->pmatch = NULL;
12655845Sbostic 	m->lastpos = NULL;
12755845Sbostic 	m->offp = string;
12855845Sbostic 	m->beginp = start;
12955845Sbostic 	m->endp = stop;
13055845Sbostic 	STATESETUP(m, 4);
13155845Sbostic 	SETUP(m->st);
13255845Sbostic 	SETUP(m->fresh);
13355845Sbostic 	SETUP(m->tmp);
13455845Sbostic 	SETUP(m->empty);
13555845Sbostic 	CLEAR(m->empty);
13655845Sbostic 
13755845Sbostic 	/* this loop does only one repetition except for backrefs */
13855845Sbostic 	for (;;) {
13955845Sbostic 		endp = fast(m, start, stop, gf, gl);
14055845Sbostic 		if (endp == NULL) {		/* a miss */
14155845Sbostic 			STATETEARDOWN(m);
14255845Sbostic 			return(REG_NOMATCH);
14355845Sbostic 		}
14455845Sbostic 		if (nmatch == 0 && !g->backrefs)
14555845Sbostic 			break;		/* no further info needed */
14655845Sbostic 
14755845Sbostic 		/* where? */
14855845Sbostic 		assert(m->coldp != NULL);
14955845Sbostic 		for (;;) {
15055845Sbostic 			endp = slow(m, m->coldp, stop, gf, gl);
15155845Sbostic 			if (endp != NULL)
15255845Sbostic 				break;
15355845Sbostic 			assert(*m->coldp != '\0');
15455845Sbostic 			m->coldp++;
15555845Sbostic 		}
15655845Sbostic 		if (nmatch == 1 && !g->backrefs)
15755845Sbostic 			break;		/* no further info needed */
15855845Sbostic 
15955845Sbostic 		/* oh my, he wants the subexpressions... */
16055845Sbostic 		if (m->pmatch == NULL)
16155845Sbostic 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
16255845Sbostic 							sizeof(regmatch_t));
16355845Sbostic 		if (m->pmatch == NULL) {
16455845Sbostic 			STATETEARDOWN(m);
16555845Sbostic 			return(REG_ESPACE);
16655845Sbostic 		}
16755845Sbostic 		for (i = 1; i <= m->g->nsub; i++) {
16855845Sbostic 			m->pmatch[i].rm_so = -1;
16955845Sbostic 			m->pmatch[i].rm_eo = -1;
17055845Sbostic 		}
17155845Sbostic 		if (!g->backrefs)
17255845Sbostic 			dp = dissect(m, m->coldp, endp, gf, gl);
17355845Sbostic 		else {
17455845Sbostic 			if (g->nplus > 0 && m->lastpos == NULL)
17555845Sbostic 				m->lastpos = (uchar **)malloc((g->nplus+1) *
17655845Sbostic 							sizeof(uchar *));
17755845Sbostic 			if (g->nplus > 0 && m->lastpos == NULL) {
17855845Sbostic 				free(m->pmatch);
17955845Sbostic 				STATETEARDOWN(m);
18055845Sbostic 				return(REG_ESPACE);
18155845Sbostic 			}
18255845Sbostic 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
18355845Sbostic 		}
18455845Sbostic 		if (dp != NULL)
18555845Sbostic 			break;
18655845Sbostic 
18755845Sbostic 		/* uh-oh... we couldn't find a subexpression-level match */
18855845Sbostic 		assert(g->backrefs);	/* must be back references doing it */
18955845Sbostic 		assert(g->nplus == 0 || m->lastpos != NULL);
19055845Sbostic 		while (dp == NULL && endp > m->coldp &&
19155845Sbostic 			(endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) {
19255845Sbostic 			/* try it on a shorter possibility */
19355845Sbostic 			for (i = 1; i <= m->g->nsub; i++) {
19455845Sbostic 				m->pmatch[i].rm_so = -1;
19555845Sbostic 				m->pmatch[i].rm_eo = -1;
19655845Sbostic 			}
19755845Sbostic 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
19855845Sbostic 		}
19955845Sbostic 		assert(dp == NULL || dp == endp);
20055845Sbostic 		if (dp != NULL)		/* found a shorter one */
20155845Sbostic 			break;
20255845Sbostic 
20355845Sbostic 		/* despite initial appearances, there is no match here */
20455845Sbostic 		start = m->coldp + 1;	/* recycle starting later */
20555845Sbostic 		assert(start <= stop);
20655845Sbostic 	}
20755845Sbostic 
20855845Sbostic 	/* fill in the details if requested */
20955845Sbostic 	if (nmatch > 0) {
21055845Sbostic 		pmatch[0].rm_so = m->coldp - m->offp;
21155845Sbostic 		pmatch[0].rm_eo = endp - m->offp;
21255845Sbostic 	}
21355845Sbostic 	if (nmatch > 1) {
21455845Sbostic 		assert(m->pmatch != NULL);
21555845Sbostic 		for (i = 1; i < nmatch; i++)
21655845Sbostic 			if (i <= m->g->nsub)
21755845Sbostic 				pmatch[i] = m->pmatch[i];
21855845Sbostic 			else {
21955845Sbostic 				pmatch[i].rm_so = -1;
22055845Sbostic 				pmatch[i].rm_eo = -1;
22155845Sbostic 			}
22255845Sbostic 	}
22355845Sbostic 
22455845Sbostic 	if (m->pmatch != NULL)
22555845Sbostic 		free((char *)m->pmatch);
22655845Sbostic 	if (m->lastpos != NULL)
22755845Sbostic 		free((char *)m->lastpos);
22855845Sbostic 	STATETEARDOWN(m);
22955845Sbostic 	return(0);
23055845Sbostic }
23155845Sbostic 
23255845Sbostic /*
23355845Sbostic  - dissect - figure out what matched what, no back references
23455845Sbostic  */
23555845Sbostic static uchar *			/* == stop (success) always */
23655845Sbostic dissect(m, start, stop, startst, stopst)
23755845Sbostic register struct match *m;
23855845Sbostic uchar *start;
23955845Sbostic uchar *stop;
24055845Sbostic sopno startst;
24155845Sbostic sopno stopst;
24255845Sbostic {
24355845Sbostic 	register int i;
24455845Sbostic 	register sopno ss;	/* start sop of current subRE */
24555845Sbostic 	register sopno es;	/* end sop of current subRE */
24655845Sbostic 	register uchar *sp;	/* start of string matched by it */
24755845Sbostic 	register uchar *stp;	/* string matched by it cannot pass here */
24855845Sbostic 	register uchar *rest;	/* start of rest of string */
24955845Sbostic 	register uchar *tail;	/* string unmatched by rest of RE */
25055845Sbostic 	register sopno ssub;	/* start sop of subsubRE */
25155845Sbostic 	register sopno esub;	/* end sop of subsubRE */
25255845Sbostic 	register uchar *ssp;	/* start of string matched by subsubRE */
25355845Sbostic 	register uchar *sep;	/* end of string matched by subsubRE */
25455845Sbostic 	register uchar *oldssp;	/* previous ssp */
25555845Sbostic 	register uchar *dp;
25655845Sbostic 	register size_t len;
25755845Sbostic 
25855845Sbostic 	DO("diss", start, stop, startst, stopst);
25955845Sbostic 	sp = start;
26055845Sbostic 	for (ss = startst; ss < stopst; ss = es) {
26155845Sbostic 		/* identify end of subRE */
26255845Sbostic 		es = ss;
26355845Sbostic 		switch (OP(m->g->strip[es])) {
26455845Sbostic 		case OPLUS_:
26555845Sbostic 		case OQUEST_:
26655845Sbostic 			es += OPND(m->g->strip[es]);
26755845Sbostic 			break;
26855845Sbostic 		case OCH_:
26955845Sbostic 			while (OP(m->g->strip[es]) != O_CH)
27055845Sbostic 				es += OPND(m->g->strip[es]);
27155845Sbostic 			break;
27255845Sbostic 		}
27355845Sbostic 		es++;
27455845Sbostic 
27555845Sbostic 		/* figure out what it matched */
27655845Sbostic 		switch (OP(m->g->strip[ss])) {
27755845Sbostic 		case OEND:
27855845Sbostic 			assert(0);
27955845Sbostic 			break;
28055845Sbostic 		case OCHAR:
28155845Sbostic 			sp++;
28255845Sbostic 			break;
28355845Sbostic 		case OBOL:
28455845Sbostic 		case OEOL:
28555845Sbostic 			break;
28655845Sbostic 		case OANY:
28755845Sbostic 		case OANYOF:
28855845Sbostic 			sp++;
28955845Sbostic 			break;
29055845Sbostic 		case OBACK_:
29155845Sbostic 		case O_BACK:
29255845Sbostic 			assert(0);
29355845Sbostic 			break;
29455845Sbostic 		/* cases where length of match is hard to find */
29555845Sbostic 		case OQUEST_:
29655845Sbostic 			stp = stop;
29755845Sbostic 			for (;;) {
29855845Sbostic 				/* how long could this one be? */
29955845Sbostic 				rest = slow(m, sp, stp, ss, es);
30055845Sbostic 				assert(rest != NULL);	/* it did match */
30155845Sbostic 				/* could the rest match the rest? */
30255845Sbostic 				tail = slow(m, rest, stop, es, stopst);
30355845Sbostic 				if (tail == stop)
30455845Sbostic 					break;		/* yes! */
30555845Sbostic 				/* no -- try a shorter match for this one */
30655845Sbostic 				stp = rest - 1;
30755845Sbostic 				assert(stp >= sp);	/* it did work */
30855845Sbostic 			}
30955845Sbostic 			ssub = ss + 1;
31055845Sbostic 			esub = es - 1;
31155845Sbostic 			/* did innards match? */
31255845Sbostic 			if (slow(m, sp, rest, ssub, esub) != NULL) {
31355845Sbostic 				dp = dissect(m, sp, rest, ssub, esub);
31455845Sbostic 				assert(dp == rest);
31555845Sbostic 			} else		/* no */
31655845Sbostic 				assert(sp == rest);
31755845Sbostic 			sp = rest;
31855845Sbostic 			break;
31955845Sbostic 		case OPLUS_:
32055845Sbostic 			stp = stop;
32155845Sbostic 			for (;;) {
32255845Sbostic 				/* how long could this one be? */
32355845Sbostic 				rest = slow(m, sp, stp, ss, es);
32455845Sbostic 				assert(rest != NULL);	/* it did match */
32555845Sbostic 				/* could the rest match the rest? */
32655845Sbostic 				tail = slow(m, rest, stop, es, stopst);
32755845Sbostic 				if (tail == stop)
32855845Sbostic 					break;		/* yes! */
32955845Sbostic 				/* no -- try a shorter match for this one */
33055845Sbostic 				stp = rest - 1;
33155845Sbostic 				assert(stp >= sp);	/* it did work */
33255845Sbostic 			}
33355845Sbostic 			ssub = ss + 1;
33455845Sbostic 			esub = es - 1;
33555845Sbostic 			ssp = sp;
33655845Sbostic 			oldssp = ssp;
33755845Sbostic 			for (;;) {	/* find last match of innards */
33855845Sbostic 				sep = slow(m, ssp, rest, ssub, esub);
33955845Sbostic 				if (sep == NULL || sep == ssp)
34055845Sbostic 					break;	/* failed or matched null */
34155845Sbostic 				oldssp = ssp;	/* on to next try */
34255845Sbostic 				ssp = sep;
34355845Sbostic 			}
34455845Sbostic 			if (sep == NULL) {
34555845Sbostic 				/* last successful match */
34655845Sbostic 				sep = ssp;
34755845Sbostic 				ssp = oldssp;
34855845Sbostic 			}
34955845Sbostic 			assert(sep == rest);	/* must exhaust substring */
35055845Sbostic 			assert(slow(m, ssp, sep, ssub, esub) == rest);
35155845Sbostic 			dp = dissect(m, ssp, sep, ssub, esub);
35255845Sbostic 			assert(dp == sep);
35355845Sbostic 			sp = rest;
35455845Sbostic 			break;
35555845Sbostic 		case OCH_:
35655845Sbostic 			stp = stop;
35755845Sbostic 			for (;;) {
35855845Sbostic 				/* how long could this one be? */
35955845Sbostic 				rest = slow(m, sp, stp, ss, es);
36055845Sbostic 				assert(rest != NULL);	/* it did match */
36155845Sbostic 				/* could the rest match the rest? */
36255845Sbostic 				tail = slow(m, rest, stop, es, stopst);
36355845Sbostic 				if (tail == stop)
36455845Sbostic 					break;		/* yes! */
36555845Sbostic 				/* no -- try a shorter match for this one */
36655845Sbostic 				stp = rest - 1;
36755845Sbostic 				assert(stp >= sp);	/* it did work */
36855845Sbostic 			}
36955845Sbostic 			ssub = ss + 1;
37055845Sbostic 			esub = ss + OPND(m->g->strip[ss]) - 1;
37155845Sbostic 			assert(OP(m->g->strip[esub]) == OOR1);
37255845Sbostic 			for (;;) {	/* find first matching branch */
37355845Sbostic 				if (slow(m, sp, rest, ssub, esub) == rest)
37455845Sbostic 					break;	/* it matched all of it */
37555845Sbostic 				/* that one missed, try next one */
37655845Sbostic 				assert(OP(m->g->strip[esub]) == OOR1);
37755845Sbostic 				esub++;
37855845Sbostic 				assert(OP(m->g->strip[esub]) == OOR2);
37955845Sbostic 				ssub = esub + 1;
38055845Sbostic 				esub += OPND(m->g->strip[esub]);
38155845Sbostic 				if (OP(m->g->strip[esub]) == OOR2)
38255845Sbostic 					esub--;
38355845Sbostic 				else
38455845Sbostic 					assert(OP(m->g->strip[esub]) == O_CH);
38555845Sbostic 			}
38655845Sbostic 			dp = dissect(m, sp, rest, ssub, esub);
38755845Sbostic 			assert(dp == rest);
38855845Sbostic 			sp = rest;
38955845Sbostic 			break;
39055845Sbostic 		case O_PLUS:
39155845Sbostic 		case O_QUEST:
39255845Sbostic 		case OOR1:
39355845Sbostic 		case OOR2:
39455845Sbostic 		case O_CH:
39555845Sbostic 			assert(0);
39655845Sbostic 			break;
39755845Sbostic 		case OLPAREN:
39855845Sbostic 			i = OPND(m->g->strip[ss]);
39955845Sbostic 			m->pmatch[i].rm_so = sp - m->offp;
40055845Sbostic 			break;
40155845Sbostic 		case ORPAREN:
40255845Sbostic 			i = OPND(m->g->strip[ss]);
40355845Sbostic 			m->pmatch[i].rm_eo = sp - m->offp;
40455845Sbostic 			break;
40555845Sbostic 		default:		/* uh oh */
40655845Sbostic 			assert(0);
40755845Sbostic 			break;
40855845Sbostic 		}
40955845Sbostic 	}
41055845Sbostic 
41155845Sbostic 	assert(sp == stop);
41255845Sbostic 	return(sp);
41355845Sbostic }
41455845Sbostic 
41555845Sbostic /*
41655845Sbostic  - backref - figure out what matched what, figuring in back references
41755845Sbostic  */
41855845Sbostic static uchar *			/* == stop (success) or NULL (failure) */
41955845Sbostic backref(m, start, stop, startst, stopst, lev)
42055845Sbostic register struct match *m;
42155845Sbostic uchar *start;
42255845Sbostic uchar *stop;
42355845Sbostic sopno startst;
42455845Sbostic sopno stopst;
42555845Sbostic sopno lev;			/* PLUS nesting level */
42655845Sbostic {
42755845Sbostic 	register int i;
42855845Sbostic 	register sopno ss;	/* start sop of current subRE */
42955845Sbostic 	register uchar *sp;	/* start of string matched by it */
43055845Sbostic 	register sopno ssub;	/* start sop of subsubRE */
43155845Sbostic 	register sopno esub;	/* end sop of subsubRE */
43255845Sbostic 	register uchar *ssp;	/* start of string matched by subsubRE */
43355845Sbostic 	register uchar *dp;
43455845Sbostic 	register size_t len;
43555845Sbostic 	register int hard;
43655845Sbostic 	register sop s;
43755845Sbostic 	register regoff_t offsave;
43855845Sbostic 	register cset *cs;
43955845Sbostic 
44055845Sbostic 	DO("back", start, stop, startst, stopst);
44155845Sbostic 	sp = start;
44255845Sbostic 
44355845Sbostic 	/* get as far as we can with easy stuff */
44455845Sbostic 	hard = 0;
44555845Sbostic 	for (ss = startst; !hard && ss < stopst; ss++)
44655845Sbostic 		switch (OP(s = m->g->strip[ss])) {
44755845Sbostic 		case OCHAR:
44855845Sbostic 			if (sp == stop || *sp++ != OPND(s))
44955845Sbostic 				return(NULL);
45055845Sbostic 			break;
45155845Sbostic 		case OANY:
45255845Sbostic 			if (sp == stop)
45355845Sbostic 				return(NULL);
45455845Sbostic 			sp++;
45555845Sbostic 			break;
45655845Sbostic 		case OANYOF:
45755845Sbostic 			cs = &m->g->sets[OPND(s)];
45855845Sbostic 			if (sp == stop || !CHIN(cs, *sp++))
45955845Sbostic 				return(NULL);
46055845Sbostic 			break;
46155845Sbostic 		case OBOL:
46255845Sbostic 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
46355845Sbostic 					(sp < m->endp && *(sp-1) == '\n' &&
46455845Sbostic 						(m->g->cflags&REG_NEWLINE)) )
46555845Sbostic 				{ /* yes */ }
46655845Sbostic 			else
46755845Sbostic 				return(NULL);
46855845Sbostic 			break;
46955845Sbostic 		case OEOL:
47055845Sbostic 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
47155845Sbostic 					(sp < m->endp && *sp == '\n' &&
47255845Sbostic 						(m->g->cflags&REG_NEWLINE)) )
47355845Sbostic 				{ /* yes */ }
47455845Sbostic 			else
47555845Sbostic 				return(NULL);
47655845Sbostic 			break;
47755845Sbostic 		case O_QUEST:
47855845Sbostic 			break;
47955845Sbostic 		case OOR1:	/* matches null but needs to skip */
48055845Sbostic 			ss++;
48155845Sbostic 			s = m->g->strip[ss];
48255845Sbostic 			do {
48355845Sbostic 				assert(OP(s) == OOR2);
48455845Sbostic 				ss += OPND(s);
48555845Sbostic 			} while (OP(s = m->g->strip[ss]) != O_CH);
48655845Sbostic 			/* note that the ss++ gets us past the O_CH */
48755845Sbostic 			break;
48855845Sbostic 		default:	/* have to make a choice */
48955845Sbostic 			hard = 1;
49055845Sbostic 			break;
49155845Sbostic 		}
49255845Sbostic 	if (!hard) {		/* that was it! */
49355845Sbostic 		if (sp != stop)
49455845Sbostic 			return(NULL);
49555845Sbostic 		return(sp);
49655845Sbostic 	}
49755845Sbostic 	ss--;			/* adjust for the for's final increment */
49855845Sbostic 
49955845Sbostic 	/* the hard stuff */
50055845Sbostic 	DO("hard", sp, stop, ss, stopst);
50155845Sbostic 	s = m->g->strip[ss];
50255845Sbostic 	switch (OP(s)) {
50355845Sbostic 	case OBACK_:		/* the vilest depths */
50455845Sbostic 		i = OPND(s);
50555845Sbostic 		if (m->pmatch[i].rm_eo == -1)
50655845Sbostic 			return(NULL);
50755845Sbostic 		assert(m->pmatch[i].rm_so != -1);
50855845Sbostic 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
50955845Sbostic 		assert(stop - m->beginp >= len);
51055845Sbostic 		if (sp > stop - len)
51155845Sbostic 			return(NULL);	/* not enough left to match */
51255845Sbostic 		ssp = m->offp + m->pmatch[i].rm_so;
51355845Sbostic 		if (strncmp((char *)sp, (char *)ssp, len) != 0)
51455845Sbostic 			return(NULL);
51555845Sbostic 		while (m->g->strip[ss] != SOP(O_BACK, i))
51655845Sbostic 			ss++;
51755845Sbostic 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
51855845Sbostic 		break;
51955845Sbostic 	case OQUEST_:		/* to null or not */
52055845Sbostic 		dp = backref(m, sp, stop, ss+1, stopst, lev);
52155845Sbostic 		if (dp != NULL)
52255845Sbostic 			return(dp);	/* not */
52355845Sbostic 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
52455845Sbostic 		break;
52555845Sbostic 	case OPLUS_:
52655845Sbostic 		assert(m->lastpos != NULL);
52755845Sbostic 		assert(lev+1 <= m->g->nplus);
52855845Sbostic 		m->lastpos[lev+1] = sp;
52955845Sbostic 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
53055845Sbostic 		break;
53155845Sbostic 	case O_PLUS:
53255845Sbostic 		if (sp == m->lastpos[lev])	/* last pass matched null */
53355845Sbostic 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
53455845Sbostic 		/* try another pass */
53555845Sbostic 		m->lastpos[lev] = sp;
53655845Sbostic 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
53755845Sbostic 		if (dp == NULL)
53855845Sbostic 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
53955845Sbostic 		else
54055845Sbostic 			return(dp);
54155845Sbostic 		break;
54255845Sbostic 	case OCH_:		/* find the right one, if any */
54355845Sbostic 		ssub = ss + 1;
54455845Sbostic 		esub = ss + OPND(s) - 1;
54555845Sbostic 		assert(OP(m->g->strip[esub]) == OOR1);
54655845Sbostic 		for (;;) {	/* find first matching branch */
54755845Sbostic 			dp = backref(m, sp, stop, ssub, esub, lev);
54855845Sbostic 			if (dp != NULL)
54955845Sbostic 				return(dp);
55055845Sbostic 			/* that one missed, try next one */
55155845Sbostic 			if (OP(m->g->strip[esub]) == O_CH)
55255845Sbostic 				return(NULL);	/* there is none */
55355845Sbostic 			esub++;
55455845Sbostic 			assert(OP(m->g->strip[esub]) == OOR2);
55555845Sbostic 			ssub = esub + 1;
55655845Sbostic 			esub += OPND(m->g->strip[esub]);
55755845Sbostic 			if (OP(m->g->strip[esub]) == OOR2)
55855845Sbostic 				esub--;
55955845Sbostic 			else
56055845Sbostic 				assert(OP(m->g->strip[esub]) == O_CH);
56155845Sbostic 		}
56255845Sbostic 		break;
56355845Sbostic 	case OLPAREN:		/* must undo assignment if rest fails */
56455845Sbostic 		i = OPND(s);
56555845Sbostic 		offsave = m->pmatch[i].rm_so;
56655845Sbostic 		m->pmatch[i].rm_so = sp - m->offp;
56755845Sbostic 		dp = backref(m, sp, stop, ss+1, stopst, lev);
56855845Sbostic 		if (dp != NULL)
56955845Sbostic 			return(dp);
57055845Sbostic 		m->pmatch[i].rm_so = offsave;
57155845Sbostic 		return(NULL);
57255845Sbostic 		break;
57355845Sbostic 	case ORPAREN:		/* must undo assignment if rest fails */
57455845Sbostic 		i = OPND(s);
57555845Sbostic 		offsave = m->pmatch[i].rm_eo;
57655845Sbostic 		m->pmatch[i].rm_eo = sp - m->offp;
57755845Sbostic 		dp = backref(m, sp, stop, ss+1, stopst, lev);
57855845Sbostic 		if (dp != NULL)
57955845Sbostic 			return(dp);
58055845Sbostic 		m->pmatch[i].rm_eo = offsave;
58155845Sbostic 		return(NULL);
58255845Sbostic 		break;
58355845Sbostic 	default:		/* uh oh */
58455845Sbostic 		assert(0);
58555845Sbostic 		break;
58655845Sbostic 	}
58755845Sbostic 
58855845Sbostic 	/* "can't happen" */
58955845Sbostic 	assert(0);
59055845Sbostic 	/* NOTREACHED */
59155845Sbostic }
59255845Sbostic 
59355845Sbostic /*
59455845Sbostic  - fast - step through the string at top speed
59555845Sbostic  */
59655845Sbostic static uchar *			/* where tentative match ended, or NULL */
59755845Sbostic fast(m, start, stop, startst, stopst)
59855845Sbostic register struct match *m;
59955845Sbostic uchar *start;
60055845Sbostic uchar *stop;
60155845Sbostic sopno startst;
60255845Sbostic sopno stopst;
60355845Sbostic {
60455845Sbostic 	register states st = m->st;
60555845Sbostic 	register states fresh = m->fresh;
60655845Sbostic 	register states tmp = m->tmp;
60755845Sbostic 	register uchar *p = start;
60855845Sbostic 	register uchar c;
60955845Sbostic 	register uchar lastc;	/* previous c */
61055845Sbostic 	register int atbol;
61155845Sbostic 	register int ateol;
61255845Sbostic 	register uchar *coldp;	/* last p after which no match was underway */
61355845Sbostic 
61455845Sbostic 	CLEAR(st);
61555845Sbostic 	SET1(st, startst);
61655845Sbostic 	st = expand(m->g, startst, stopst, st, 0, 0);
61755845Sbostic 	ASSIGN(fresh, st);
61855845Sbostic 	SP("start", st, *p);
61955845Sbostic 	c = '\0';
62055845Sbostic 	coldp = NULL;
62155845Sbostic 	for (;;) {
62255845Sbostic 		/* next character */
62355845Sbostic 		lastc = c;
62455845Sbostic 		c = (p == m->endp) ? '\0' : *p;
62555845Sbostic 		if (EQ(st, fresh))
62655845Sbostic 			coldp = p;
62755845Sbostic 
62855845Sbostic 		/* is there an EOL and/or BOL between lastc and c? */
62955845Sbostic 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
63055845Sbostic 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
63155845Sbostic 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
63255845Sbostic 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
63355845Sbostic 		if (atbol || ateol) {
63455845Sbostic 			st = expand(m->g, startst, stopst, st, atbol, ateol);
63555845Sbostic 			SP("bef", st, c);
63655845Sbostic 		}
63755845Sbostic 
63855845Sbostic 		/* are we done? */
63955845Sbostic 		if (ISSET(st, stopst) || p == stop)
64055845Sbostic 			break;		/* NOTE BREAK OUT */
64155845Sbostic 
64255845Sbostic 		/* no, we must deal with this character */
64355845Sbostic 		ASSIGN(tmp, st);
64455845Sbostic 		ASSIGN(st, fresh);
64555845Sbostic 		st = step(m->g, startst, stopst, tmp, c, st);
64655845Sbostic 		SP("aft", st, c);
64755845Sbostic 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
64855845Sbostic 		p++;
64955845Sbostic 	}
65055845Sbostic 
65155845Sbostic 	assert(coldp != NULL);
65255845Sbostic 	m->coldp = coldp;
65355845Sbostic 	if (ISSET(st, stopst))
65455845Sbostic 		return(p+1);
65555845Sbostic 	else
65655845Sbostic 		return(NULL);
65755845Sbostic }
65855845Sbostic 
65955845Sbostic /*
66055845Sbostic  - slow - step through the string more deliberately
66155845Sbostic  */
66255845Sbostic static uchar *			/* where it ended */
66355845Sbostic slow(m, start, stop, startst, stopst)
66455845Sbostic register struct match *m;
66555845Sbostic uchar *start;
66655845Sbostic uchar *stop;
66755845Sbostic sopno startst;
66855845Sbostic sopno stopst;
66955845Sbostic {
67055845Sbostic 	register states st = m->st;
67155845Sbostic 	register states empty = m->empty;
67255845Sbostic 	register states tmp = m->tmp;
67355845Sbostic 	register uchar *p = start;
67455845Sbostic 	register uchar c = (start == m->beginp) ? '\0' : *(start-1);
67555845Sbostic 	register uchar lastc;	/* previous c */
67655845Sbostic 	register int atbol;
67755845Sbostic 	register int ateol;
67855845Sbostic 	register uchar *matchp;	/* last p at which a match ended */
67955845Sbostic 
68055845Sbostic 	DO("slow", start, stop, startst, stopst);
68155845Sbostic 	CLEAR(st);
68255845Sbostic 	SET1(st, startst);
68355845Sbostic 	SP("sstart", st, *p);
68455845Sbostic 	matchp = NULL;
68555845Sbostic 	for (;;) {
68655845Sbostic 		/* next character */
68755845Sbostic 		lastc = c;
68855845Sbostic 		c = (p == m->endp) ? '\0' : *p;
68955845Sbostic 
69055845Sbostic 		/* is there an EOL and/or BOL between lastc and c? */
69155845Sbostic 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
69255845Sbostic 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
69355845Sbostic 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
69455845Sbostic 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
69555845Sbostic 
69655845Sbostic 		/* do we need an expansion before looking at the char? */
69755845Sbostic 		if (p == start || atbol || ateol) {
69855845Sbostic 			st = expand(m->g, startst, stopst, st, atbol, ateol);
69955845Sbostic 			SP("sbef", st, c);
70055845Sbostic 		}
70155845Sbostic 
70255845Sbostic 		/* are we done? */
70355845Sbostic 		if (ISSET(st, stopst))
70455845Sbostic 			matchp = p;
70555845Sbostic 		if (EQ(st, empty) || p == stop)
70655845Sbostic 			break;		/* NOTE BREAK OUT */
70755845Sbostic 
70855845Sbostic 		/* no, we must deal with this character */
70955845Sbostic 		ASSIGN(tmp, st);
71055845Sbostic 		ASSIGN(st, empty);
71155845Sbostic 		st = step(m->g, startst, stopst, tmp, c, st);
71255845Sbostic 		SP("saft", st, c);
71355845Sbostic 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
71455845Sbostic 		p++;
71555845Sbostic 	}
71655845Sbostic 
71755845Sbostic 	return(matchp);
71855845Sbostic }
71955845Sbostic 
72055845Sbostic 
72155845Sbostic /*
72255845Sbostic  - expand - return set of states reachable from an initial set
72355845Sbostic  */
72455845Sbostic static states
72555845Sbostic expand(g, start, stop, st, atbol, ateol)
72655845Sbostic register struct re_guts *g;
72755845Sbostic int start;			/* start state within strip */
72855845Sbostic int stop;			/* state after stop state within strip */
72955845Sbostic register states st;
73055845Sbostic int atbol;			/* at start or just after \n? (for BOL) */
73155845Sbostic int ateol;			/* just before \n or \0? (for EOL) */
73255845Sbostic {
73355845Sbostic 	register sop s;
73455845Sbostic 	register sopno pc;
73555845Sbostic 	register onestate here;		/* note, macros know this name */
73655845Sbostic 	register sopno look;
73755845Sbostic 	register int i;
73855845Sbostic 
73955845Sbostic 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
74055845Sbostic 		s = g->strip[pc];
74155845Sbostic 		switch (OP(s)) {
74255845Sbostic 		case OEND:
74355845Sbostic 			assert(pc == stop-1);
74455845Sbostic 			break;
74555845Sbostic 		case OCHAR:		/* can't get past this */
74655845Sbostic 			break;
74755845Sbostic 		case OBOL:
74855845Sbostic 			if (atbol)
74955845Sbostic 				FWD(st, st, 1);
75055845Sbostic 			break;
75155845Sbostic 		case OEOL:
75255845Sbostic 			if (ateol)
75355845Sbostic 				FWD(st, st, 1);
75455845Sbostic 			break;
75555845Sbostic 		case OANY:		/* can't get past this either */
75655845Sbostic 			break;
75755845Sbostic 		case OANYOF:		/* or this */
75855845Sbostic 			break;
75955845Sbostic 		case OBACK_:		/* not significant here */
76055845Sbostic 		case O_BACK:
76155845Sbostic 			FWD(st, st, 1);
76255845Sbostic 			break;
76355845Sbostic 		case OPLUS_:		/* forward, this is just an empty */
76455845Sbostic 			FWD(st, st, 1);
76555845Sbostic 			break;
76655845Sbostic 		case O_PLUS:		/* both forward and back */
76755845Sbostic 			FWD(st, st, 1);
76855845Sbostic 			i = ISSETBACK(st, OPND(s));
76955845Sbostic 			BACK(st, st, OPND(s));
77055845Sbostic 			if (!i && ISSETBACK(st, OPND(s))) {
77155845Sbostic 				/* oho, must reconsider loop body */
77255845Sbostic 				pc -= OPND(s) + 1;
77355845Sbostic 				INIT(here, pc);
77455845Sbostic 			}
77555845Sbostic 			break;
77655845Sbostic 		case OQUEST_:		/* two branches, both forward */
77755845Sbostic 			FWD(st, st, 1);
77855845Sbostic 			FWD(st, st, OPND(s));
77955845Sbostic 			break;
78055845Sbostic 		case O_QUEST:		/* just an empty */
78155845Sbostic 			FWD(st, st, 1);
78255845Sbostic 			break;
78355845Sbostic 		case OLPAREN:		/* not significant here */
78455845Sbostic 		case ORPAREN:
78555845Sbostic 			FWD(st, st, 1);
78655845Sbostic 			break;
78755845Sbostic 		case OCH_:		/* mark the first two branches */
78855845Sbostic 			FWD(st, st, 1);
78955845Sbostic 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
79055845Sbostic 			FWD(st, st, OPND(s));
79155845Sbostic 			break;
79255845Sbostic 		case OOR1:		/* done a branch, find the O_CH */
79355845Sbostic 			if (ISSTATEIN(st, here)) {
79455845Sbostic 				for (look = 1;
79555845Sbostic 						OP(s = g->strip[pc+look]) != O_CH;
79655845Sbostic 						look += OPND(s))
79755845Sbostic 					assert(OP(s) == OOR2);
79855845Sbostic 				FWD(st, st, look);
79955845Sbostic 			}
80055845Sbostic 			break;
80155845Sbostic 		case OOR2:		/* propagate OCH_'s marking onward */
80255845Sbostic 			FWD(st, st, 1);
80355845Sbostic 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
80455845Sbostic 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
80555845Sbostic 				FWD(st, st, OPND(s));
80655845Sbostic 			}
80755845Sbostic 			break;
80855845Sbostic 		case O_CH:		/* just an empty */
80955845Sbostic 			FWD(st, st, 1);
81055845Sbostic 			break;
81155845Sbostic 		default:		/* ooooops... */
81255845Sbostic 			assert(0);
81355845Sbostic 			break;
81455845Sbostic 		}
81555845Sbostic 	}
81655845Sbostic 
81755845Sbostic 	return(st);
81855845Sbostic }
81955845Sbostic 
82055845Sbostic /*
82155845Sbostic  - step - map set of states reachable before char to set reachable after
82255845Sbostic  */
82355845Sbostic static states
82455845Sbostic step(g, start, stop, bef, ch, aft)
82555845Sbostic register struct re_guts *g;
82655845Sbostic int start;			/* start state within strip */
82755845Sbostic int stop;			/* state after stop state within strip */
82855845Sbostic register states bef;		/* states reachable before */
82955845Sbostic uchar ch;
83055845Sbostic register states aft;		/* states already known reachable after */
83155845Sbostic {
83255845Sbostic 	register cset *cs;
83355845Sbostic 	register sop s;
83455845Sbostic 	register sopno pc;
83555845Sbostic 	register onestate here;		/* note, macros know this name */
83655845Sbostic 	register sopno look;
83755845Sbostic 	register int i;
83855845Sbostic 
83955845Sbostic 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
84055845Sbostic 		s = g->strip[pc];
84155845Sbostic 		switch (OP(s)) {
84255845Sbostic 		case OEND:
84355845Sbostic 			assert(pc == stop-1);
84455845Sbostic 			break;
84555845Sbostic 		case OCHAR:
84655845Sbostic 			if (ch == OPND(s))
84755845Sbostic 				FWD(aft, bef, 1);
84855845Sbostic 			break;
84955845Sbostic 		case OBOL:	/* these are looked after by expand() */
85055845Sbostic 		case OEOL:
85155845Sbostic 			break;
85255845Sbostic 		case OANY:
85355845Sbostic 			FWD(aft, bef, 1);
85455845Sbostic 			break;
85555845Sbostic 		case OANYOF:
85655845Sbostic 			cs = &g->sets[OPND(s)];
85755845Sbostic 			if (CHIN(cs, ch))
85855845Sbostic 				FWD(aft, bef, 1);
85955845Sbostic 			break;
86055845Sbostic 		case OBACK_:		/* ignored here */
86155845Sbostic 		case O_BACK:
86255845Sbostic 			FWD(aft, aft, 1);
86355845Sbostic 			break;
86455845Sbostic 		case OPLUS_:		/* forward, this is just an empty */
86555845Sbostic 			FWD(aft, aft, 1);
86655845Sbostic 			break;
86755845Sbostic 		case O_PLUS:		/* both forward and back */
86855845Sbostic 			FWD(aft, aft, 1);
86955845Sbostic 			i = ISSETBACK(aft, OPND(s));
87055845Sbostic 			BACK(aft, aft, OPND(s));
87155845Sbostic 			if (!i && ISSETBACK(aft, OPND(s))) {
87255845Sbostic 				/* oho, must reconsider loop body */
87355845Sbostic 				pc -= OPND(s) + 1;
87455845Sbostic 				INIT(here, pc);
87555845Sbostic 			}
87655845Sbostic 			break;
87755845Sbostic 		case OQUEST_:		/* two branches, both forward */
87855845Sbostic 			FWD(aft, aft, 1);
87955845Sbostic 			FWD(aft, aft, OPND(s));
88055845Sbostic 			break;
88155845Sbostic 		case O_QUEST:		/* just an empty */
88255845Sbostic 			FWD(aft, aft, 1);
88355845Sbostic 			break;
88455845Sbostic 		case OLPAREN:		/* not significant here */
88555845Sbostic 		case ORPAREN:
88655845Sbostic 			FWD(aft, aft, 1);
88755845Sbostic 			break;
88855845Sbostic 		case OCH_:		/* mark the first two branches */
88955845Sbostic 			FWD(aft, aft, 1);
89055845Sbostic 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
89155845Sbostic 			FWD(aft, aft, OPND(s));
89255845Sbostic 			break;
89355845Sbostic 		case OOR1:		/* done a branch, find the O_CH */
89455845Sbostic 			if (ISSTATEIN(aft, here)) {
89555845Sbostic 				for (look = 1;
89655845Sbostic 						OP(s = g->strip[pc+look]) != O_CH;
89755845Sbostic 						look += OPND(s))
89855845Sbostic 					assert(OP(s) == OOR2);
89955845Sbostic 				FWD(aft, aft, look);
90055845Sbostic 			}
90155845Sbostic 			break;
90255845Sbostic 		case OOR2:		/* propagate OCH_'s marking */
90355845Sbostic 			FWD(aft, aft, 1);
90455845Sbostic 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
90555845Sbostic 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
90655845Sbostic 				FWD(aft, aft, OPND(s));
90755845Sbostic 			}
90855845Sbostic 			break;
90955845Sbostic 		case O_CH:		/* just empty */
91055845Sbostic 			FWD(aft, aft, 1);
91155845Sbostic 			break;
91255845Sbostic 		default:		/* ooooops... */
91355845Sbostic 			assert(0);
91455845Sbostic 			break;
91555845Sbostic 		}
91655845Sbostic 	}
91755845Sbostic 
91855845Sbostic 	return(aft);
91955845Sbostic }
92055845Sbostic 
92155845Sbostic #ifndef NDEBUG
92255845Sbostic /*
92355845Sbostic  - print - print a set of states
92455845Sbostic  */
92555845Sbostic static void
92655845Sbostic print(g, caption, st, ch, d)
92755845Sbostic struct re_guts *g;
92855845Sbostic char *caption;
92955845Sbostic states st;
93055845Sbostic uchar ch;
93155845Sbostic FILE *d;
93255845Sbostic {
93355845Sbostic 	register int i;
93455845Sbostic 	register int first = 1;
93555845Sbostic 
93655845Sbostic 	fprintf(d, "%s", caption);
93755845Sbostic 	if (ch != '\0')
93855845Sbostic 		fprintf(d, " %s", regchar(ch));
93955845Sbostic 	for (i = 0; i < g->nstates; i++)
94055845Sbostic 		if (ISSET(st, i)) {
94155845Sbostic 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
94255845Sbostic 			first = 0;
94355845Sbostic 		}
94455845Sbostic 	fprintf(d, "\n");
94555845Sbostic }
94655845Sbostic #endif
94755845Sbostic 
94855845Sbostic #undef	matcher
94955845Sbostic #undef	fast
95055845Sbostic #undef	slow
95155845Sbostic #undef	dissect
95255845Sbostic #undef	backref
95355845Sbostic #undef	expand
95455845Sbostic #undef	step
95555845Sbostic #undef	print
95655845Sbostic #undef	match
957