xref: /csrg-svn/lib/libc/regex/engine.c (revision 55845)
1 /*-
2  * Copyright (c) 1992 Henry Spencer.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Henry Spencer of the University of Toronto.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)engine.c	5.1 (Berkeley) 08/06/92
12  */
13 
14 /*
15  * The matching engine and friends.  This file is #included by regexec.c
16  * after suitable #defines of a variety of macros used herein, so that
17  * different state representations can be used without duplicating masses
18  * of code.
19  */
20 
21 #ifdef SNAMES
22 #define	matcher	smatcher
23 #define	fast	sfast
24 #define	slow	sslow
25 #define	dissect	sdissect
26 #define	backref	sbackref
27 #define	expand	sexpand
28 #define	step	sstep
29 #define	print	sprint
30 #define	match	smat
31 #endif
32 #ifdef LNAMES
33 #define	matcher	lmatcher
34 #define	fast	lfast
35 #define	slow	lslow
36 #define	dissect	ldissect
37 #define	backref	lbackref
38 #define	expand	lexpand
39 #define	step	lstep
40 #define	print	lprint
41 #define	match	lmat
42 #endif
43 
44 /* another structure passed up and down to avoid zillions of parameters */
45 struct match {
46 	struct re_guts *g;
47 	int eflags;
48 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
49 	uchar *offp;		/* offsets work from here */
50 	uchar *beginp;		/* start of string -- virtual NUL precedes */
51 	uchar *endp;		/* end of string -- virtual NUL here */
52 	uchar *coldp;		/* can be no match starting before here */
53 	uchar **lastpos;	/* [nplus+1] */
54 	STATEVARS;
55 	states st;		/* current states */
56 	states fresh;		/* states for a fresh start */
57 	states tmp;		/* temporary */
58 	states empty;		/* empty set of states */
59 };
60 
61 #ifndef NDEBUG
62 STATIC void print();
63 extern char *regchar();
64 #define	SP(t, s, c)	{ if (m->eflags&REG_TRACE) print(m->g, t, s, c, stdout); }
65 #define	DO(t, p1, p2, s1, s2)	{ if (m->eflags&REG_TRACE) { \
66 					printf("%s %s-", t, regchar(*(p1))); \
67 					printf("%s ", regchar(*(p2))); \
68 					printf("%ld-%ld\n", s1, s2); } }
69 #else
70 #define	SP(t, s, c)	/* nothing */
71 #define	DO(t, p1, p2, s1, s2)	/* nothing */
72 #endif
73 
74 STATIC uchar *fast();
75 STATIC uchar *slow();
76 STATIC uchar *dissect();
77 STATIC uchar *backref();
78 STATIC states expand();
79 STATIC states step();
80 
81 /*
82  - matcher - the actual matching engine
83  */
84 static int			/* 0 success, REG_NOMATCH failure */
85 matcher(g, string, nmatch, pmatch, eflags)
86 register struct re_guts *g;
87 uchar *string;
88 size_t nmatch;
89 regmatch_t pmatch[];
90 int eflags;
91 {
92 	register uchar *endp;
93 	register int i;
94 	struct match mv;
95 	register struct match *m = &mv;
96 	register uchar *dp;
97 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
98 	const register sopno gl = g->laststate;
99 	uchar *start;
100 	uchar *stop;
101 
102 	if (g->cflags&REG_NOSUB)
103 		nmatch = 0;		/* simplify tests */
104 	if (eflags&REG_STARTEND) {
105 		start = string + pmatch[0].rm_so;
106 		stop = string + pmatch[0].rm_eo;
107 	} else {
108 		start = string;
109 		stop = start + strlen((char *)start);
110 	}
111 
112 	/* match struct setup */
113 	m->g = g;
114 	m->eflags = eflags;
115 	m->pmatch = NULL;
116 	m->lastpos = NULL;
117 	m->offp = string;
118 	m->beginp = start;
119 	m->endp = stop;
120 	STATESETUP(m, 4);
121 	SETUP(m->st);
122 	SETUP(m->fresh);
123 	SETUP(m->tmp);
124 	SETUP(m->empty);
125 	CLEAR(m->empty);
126 
127 	/* this loop does only one repetition except for backrefs */
128 	for (;;) {
129 		endp = fast(m, start, stop, gf, gl);
130 		if (endp == NULL) {		/* a miss */
131 			STATETEARDOWN(m);
132 			return(REG_NOMATCH);
133 		}
134 		if (nmatch == 0 && !g->backrefs)
135 			break;		/* no further info needed */
136 
137 		/* where? */
138 		assert(m->coldp != NULL);
139 		for (;;) {
140 			endp = slow(m, m->coldp, stop, gf, gl);
141 			if (endp != NULL)
142 				break;
143 			assert(*m->coldp != '\0');
144 			m->coldp++;
145 		}
146 		if (nmatch == 1 && !g->backrefs)
147 			break;		/* no further info needed */
148 
149 		/* oh my, he wants the subexpressions... */
150 		if (m->pmatch == NULL)
151 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
152 							sizeof(regmatch_t));
153 		if (m->pmatch == NULL) {
154 			STATETEARDOWN(m);
155 			return(REG_ESPACE);
156 		}
157 		for (i = 1; i <= m->g->nsub; i++) {
158 			m->pmatch[i].rm_so = -1;
159 			m->pmatch[i].rm_eo = -1;
160 		}
161 		if (!g->backrefs)
162 			dp = dissect(m, m->coldp, endp, gf, gl);
163 		else {
164 			if (g->nplus > 0 && m->lastpos == NULL)
165 				m->lastpos = (uchar **)malloc((g->nplus+1) *
166 							sizeof(uchar *));
167 			if (g->nplus > 0 && m->lastpos == NULL) {
168 				free(m->pmatch);
169 				STATETEARDOWN(m);
170 				return(REG_ESPACE);
171 			}
172 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
173 		}
174 		if (dp != NULL)
175 			break;
176 
177 		/* uh-oh... we couldn't find a subexpression-level match */
178 		assert(g->backrefs);	/* must be back references doing it */
179 		assert(g->nplus == 0 || m->lastpos != NULL);
180 		while (dp == NULL && endp > m->coldp &&
181 			(endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) {
182 			/* try it on a shorter possibility */
183 			for (i = 1; i <= m->g->nsub; i++) {
184 				m->pmatch[i].rm_so = -1;
185 				m->pmatch[i].rm_eo = -1;
186 			}
187 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
188 		}
189 		assert(dp == NULL || dp == endp);
190 		if (dp != NULL)		/* found a shorter one */
191 			break;
192 
193 		/* despite initial appearances, there is no match here */
194 		start = m->coldp + 1;	/* recycle starting later */
195 		assert(start <= stop);
196 	}
197 
198 	/* fill in the details if requested */
199 	if (nmatch > 0) {
200 		pmatch[0].rm_so = m->coldp - m->offp;
201 		pmatch[0].rm_eo = endp - m->offp;
202 	}
203 	if (nmatch > 1) {
204 		assert(m->pmatch != NULL);
205 		for (i = 1; i < nmatch; i++)
206 			if (i <= m->g->nsub)
207 				pmatch[i] = m->pmatch[i];
208 			else {
209 				pmatch[i].rm_so = -1;
210 				pmatch[i].rm_eo = -1;
211 			}
212 	}
213 
214 	if (m->pmatch != NULL)
215 		free((char *)m->pmatch);
216 	if (m->lastpos != NULL)
217 		free((char *)m->lastpos);
218 	STATETEARDOWN(m);
219 	return(0);
220 }
221 
222 /*
223  - dissect - figure out what matched what, no back references
224  */
225 static uchar *			/* == stop (success) always */
226 dissect(m, start, stop, startst, stopst)
227 register struct match *m;
228 uchar *start;
229 uchar *stop;
230 sopno startst;
231 sopno stopst;
232 {
233 	register int i;
234 	register sopno ss;	/* start sop of current subRE */
235 	register sopno es;	/* end sop of current subRE */
236 	register uchar *sp;	/* start of string matched by it */
237 	register uchar *stp;	/* string matched by it cannot pass here */
238 	register uchar *rest;	/* start of rest of string */
239 	register uchar *tail;	/* string unmatched by rest of RE */
240 	register sopno ssub;	/* start sop of subsubRE */
241 	register sopno esub;	/* end sop of subsubRE */
242 	register uchar *ssp;	/* start of string matched by subsubRE */
243 	register uchar *sep;	/* end of string matched by subsubRE */
244 	register uchar *oldssp;	/* previous ssp */
245 	register uchar *dp;
246 	register size_t len;
247 
248 	DO("diss", start, stop, startst, stopst);
249 	sp = start;
250 	for (ss = startst; ss < stopst; ss = es) {
251 		/* identify end of subRE */
252 		es = ss;
253 		switch (OP(m->g->strip[es])) {
254 		case OPLUS_:
255 		case OQUEST_:
256 			es += OPND(m->g->strip[es]);
257 			break;
258 		case OCH_:
259 			while (OP(m->g->strip[es]) != O_CH)
260 				es += OPND(m->g->strip[es]);
261 			break;
262 		}
263 		es++;
264 
265 		/* figure out what it matched */
266 		switch (OP(m->g->strip[ss])) {
267 		case OEND:
268 			assert(0);
269 			break;
270 		case OCHAR:
271 			sp++;
272 			break;
273 		case OBOL:
274 		case OEOL:
275 			break;
276 		case OANY:
277 		case OANYOF:
278 			sp++;
279 			break;
280 		case OBACK_:
281 		case O_BACK:
282 			assert(0);
283 			break;
284 		/* cases where length of match is hard to find */
285 		case OQUEST_:
286 			stp = stop;
287 			for (;;) {
288 				/* how long could this one be? */
289 				rest = slow(m, sp, stp, ss, es);
290 				assert(rest != NULL);	/* it did match */
291 				/* could the rest match the rest? */
292 				tail = slow(m, rest, stop, es, stopst);
293 				if (tail == stop)
294 					break;		/* yes! */
295 				/* no -- try a shorter match for this one */
296 				stp = rest - 1;
297 				assert(stp >= sp);	/* it did work */
298 			}
299 			ssub = ss + 1;
300 			esub = es - 1;
301 			/* did innards match? */
302 			if (slow(m, sp, rest, ssub, esub) != NULL) {
303 				dp = dissect(m, sp, rest, ssub, esub);
304 				assert(dp == rest);
305 			} else		/* no */
306 				assert(sp == rest);
307 			sp = rest;
308 			break;
309 		case OPLUS_:
310 			stp = stop;
311 			for (;;) {
312 				/* how long could this one be? */
313 				rest = slow(m, sp, stp, ss, es);
314 				assert(rest != NULL);	/* it did match */
315 				/* could the rest match the rest? */
316 				tail = slow(m, rest, stop, es, stopst);
317 				if (tail == stop)
318 					break;		/* yes! */
319 				/* no -- try a shorter match for this one */
320 				stp = rest - 1;
321 				assert(stp >= sp);	/* it did work */
322 			}
323 			ssub = ss + 1;
324 			esub = es - 1;
325 			ssp = sp;
326 			oldssp = ssp;
327 			for (;;) {	/* find last match of innards */
328 				sep = slow(m, ssp, rest, ssub, esub);
329 				if (sep == NULL || sep == ssp)
330 					break;	/* failed or matched null */
331 				oldssp = ssp;	/* on to next try */
332 				ssp = sep;
333 			}
334 			if (sep == NULL) {
335 				/* last successful match */
336 				sep = ssp;
337 				ssp = oldssp;
338 			}
339 			assert(sep == rest);	/* must exhaust substring */
340 			assert(slow(m, ssp, sep, ssub, esub) == rest);
341 			dp = dissect(m, ssp, sep, ssub, esub);
342 			assert(dp == sep);
343 			sp = rest;
344 			break;
345 		case OCH_:
346 			stp = stop;
347 			for (;;) {
348 				/* how long could this one be? */
349 				rest = slow(m, sp, stp, ss, es);
350 				assert(rest != NULL);	/* it did match */
351 				/* could the rest match the rest? */
352 				tail = slow(m, rest, stop, es, stopst);
353 				if (tail == stop)
354 					break;		/* yes! */
355 				/* no -- try a shorter match for this one */
356 				stp = rest - 1;
357 				assert(stp >= sp);	/* it did work */
358 			}
359 			ssub = ss + 1;
360 			esub = ss + OPND(m->g->strip[ss]) - 1;
361 			assert(OP(m->g->strip[esub]) == OOR1);
362 			for (;;) {	/* find first matching branch */
363 				if (slow(m, sp, rest, ssub, esub) == rest)
364 					break;	/* it matched all of it */
365 				/* that one missed, try next one */
366 				assert(OP(m->g->strip[esub]) == OOR1);
367 				esub++;
368 				assert(OP(m->g->strip[esub]) == OOR2);
369 				ssub = esub + 1;
370 				esub += OPND(m->g->strip[esub]);
371 				if (OP(m->g->strip[esub]) == OOR2)
372 					esub--;
373 				else
374 					assert(OP(m->g->strip[esub]) == O_CH);
375 			}
376 			dp = dissect(m, sp, rest, ssub, esub);
377 			assert(dp == rest);
378 			sp = rest;
379 			break;
380 		case O_PLUS:
381 		case O_QUEST:
382 		case OOR1:
383 		case OOR2:
384 		case O_CH:
385 			assert(0);
386 			break;
387 		case OLPAREN:
388 			i = OPND(m->g->strip[ss]);
389 			m->pmatch[i].rm_so = sp - m->offp;
390 			break;
391 		case ORPAREN:
392 			i = OPND(m->g->strip[ss]);
393 			m->pmatch[i].rm_eo = sp - m->offp;
394 			break;
395 		default:		/* uh oh */
396 			assert(0);
397 			break;
398 		}
399 	}
400 
401 	assert(sp == stop);
402 	return(sp);
403 }
404 
405 /*
406  - backref - figure out what matched what, figuring in back references
407  */
408 static uchar *			/* == stop (success) or NULL (failure) */
409 backref(m, start, stop, startst, stopst, lev)
410 register struct match *m;
411 uchar *start;
412 uchar *stop;
413 sopno startst;
414 sopno stopst;
415 sopno lev;			/* PLUS nesting level */
416 {
417 	register int i;
418 	register sopno ss;	/* start sop of current subRE */
419 	register uchar *sp;	/* start of string matched by it */
420 	register sopno ssub;	/* start sop of subsubRE */
421 	register sopno esub;	/* end sop of subsubRE */
422 	register uchar *ssp;	/* start of string matched by subsubRE */
423 	register uchar *dp;
424 	register size_t len;
425 	register int hard;
426 	register sop s;
427 	register regoff_t offsave;
428 	register cset *cs;
429 
430 	DO("back", start, stop, startst, stopst);
431 	sp = start;
432 
433 	/* get as far as we can with easy stuff */
434 	hard = 0;
435 	for (ss = startst; !hard && ss < stopst; ss++)
436 		switch (OP(s = m->g->strip[ss])) {
437 		case OCHAR:
438 			if (sp == stop || *sp++ != OPND(s))
439 				return(NULL);
440 			break;
441 		case OANY:
442 			if (sp == stop)
443 				return(NULL);
444 			sp++;
445 			break;
446 		case OANYOF:
447 			cs = &m->g->sets[OPND(s)];
448 			if (sp == stop || !CHIN(cs, *sp++))
449 				return(NULL);
450 			break;
451 		case OBOL:
452 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
453 					(sp < m->endp && *(sp-1) == '\n' &&
454 						(m->g->cflags&REG_NEWLINE)) )
455 				{ /* yes */ }
456 			else
457 				return(NULL);
458 			break;
459 		case OEOL:
460 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
461 					(sp < m->endp && *sp == '\n' &&
462 						(m->g->cflags&REG_NEWLINE)) )
463 				{ /* yes */ }
464 			else
465 				return(NULL);
466 			break;
467 		case O_QUEST:
468 			break;
469 		case OOR1:	/* matches null but needs to skip */
470 			ss++;
471 			s = m->g->strip[ss];
472 			do {
473 				assert(OP(s) == OOR2);
474 				ss += OPND(s);
475 			} while (OP(s = m->g->strip[ss]) != O_CH);
476 			/* note that the ss++ gets us past the O_CH */
477 			break;
478 		default:	/* have to make a choice */
479 			hard = 1;
480 			break;
481 		}
482 	if (!hard) {		/* that was it! */
483 		if (sp != stop)
484 			return(NULL);
485 		return(sp);
486 	}
487 	ss--;			/* adjust for the for's final increment */
488 
489 	/* the hard stuff */
490 	DO("hard", sp, stop, ss, stopst);
491 	s = m->g->strip[ss];
492 	switch (OP(s)) {
493 	case OBACK_:		/* the vilest depths */
494 		i = OPND(s);
495 		if (m->pmatch[i].rm_eo == -1)
496 			return(NULL);
497 		assert(m->pmatch[i].rm_so != -1);
498 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
499 		assert(stop - m->beginp >= len);
500 		if (sp > stop - len)
501 			return(NULL);	/* not enough left to match */
502 		ssp = m->offp + m->pmatch[i].rm_so;
503 		if (strncmp((char *)sp, (char *)ssp, len) != 0)
504 			return(NULL);
505 		while (m->g->strip[ss] != SOP(O_BACK, i))
506 			ss++;
507 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
508 		break;
509 	case OQUEST_:		/* to null or not */
510 		dp = backref(m, sp, stop, ss+1, stopst, lev);
511 		if (dp != NULL)
512 			return(dp);	/* not */
513 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
514 		break;
515 	case OPLUS_:
516 		assert(m->lastpos != NULL);
517 		assert(lev+1 <= m->g->nplus);
518 		m->lastpos[lev+1] = sp;
519 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
520 		break;
521 	case O_PLUS:
522 		if (sp == m->lastpos[lev])	/* last pass matched null */
523 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
524 		/* try another pass */
525 		m->lastpos[lev] = sp;
526 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
527 		if (dp == NULL)
528 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
529 		else
530 			return(dp);
531 		break;
532 	case OCH_:		/* find the right one, if any */
533 		ssub = ss + 1;
534 		esub = ss + OPND(s) - 1;
535 		assert(OP(m->g->strip[esub]) == OOR1);
536 		for (;;) {	/* find first matching branch */
537 			dp = backref(m, sp, stop, ssub, esub, lev);
538 			if (dp != NULL)
539 				return(dp);
540 			/* that one missed, try next one */
541 			if (OP(m->g->strip[esub]) == O_CH)
542 				return(NULL);	/* there is none */
543 			esub++;
544 			assert(OP(m->g->strip[esub]) == OOR2);
545 			ssub = esub + 1;
546 			esub += OPND(m->g->strip[esub]);
547 			if (OP(m->g->strip[esub]) == OOR2)
548 				esub--;
549 			else
550 				assert(OP(m->g->strip[esub]) == O_CH);
551 		}
552 		break;
553 	case OLPAREN:		/* must undo assignment if rest fails */
554 		i = OPND(s);
555 		offsave = m->pmatch[i].rm_so;
556 		m->pmatch[i].rm_so = sp - m->offp;
557 		dp = backref(m, sp, stop, ss+1, stopst, lev);
558 		if (dp != NULL)
559 			return(dp);
560 		m->pmatch[i].rm_so = offsave;
561 		return(NULL);
562 		break;
563 	case ORPAREN:		/* must undo assignment if rest fails */
564 		i = OPND(s);
565 		offsave = m->pmatch[i].rm_eo;
566 		m->pmatch[i].rm_eo = sp - m->offp;
567 		dp = backref(m, sp, stop, ss+1, stopst, lev);
568 		if (dp != NULL)
569 			return(dp);
570 		m->pmatch[i].rm_eo = offsave;
571 		return(NULL);
572 		break;
573 	default:		/* uh oh */
574 		assert(0);
575 		break;
576 	}
577 
578 	/* "can't happen" */
579 	assert(0);
580 	/* NOTREACHED */
581 }
582 
583 /*
584  - fast - step through the string at top speed
585  */
586 static uchar *			/* where tentative match ended, or NULL */
587 fast(m, start, stop, startst, stopst)
588 register struct match *m;
589 uchar *start;
590 uchar *stop;
591 sopno startst;
592 sopno stopst;
593 {
594 	register states st = m->st;
595 	register states fresh = m->fresh;
596 	register states tmp = m->tmp;
597 	register uchar *p = start;
598 	register uchar c;
599 	register uchar lastc;	/* previous c */
600 	register int atbol;
601 	register int ateol;
602 	register uchar *coldp;	/* last p after which no match was underway */
603 
604 	CLEAR(st);
605 	SET1(st, startst);
606 	st = expand(m->g, startst, stopst, st, 0, 0);
607 	ASSIGN(fresh, st);
608 	SP("start", st, *p);
609 	c = '\0';
610 	coldp = NULL;
611 	for (;;) {
612 		/* next character */
613 		lastc = c;
614 		c = (p == m->endp) ? '\0' : *p;
615 		if (EQ(st, fresh))
616 			coldp = p;
617 
618 		/* is there an EOL and/or BOL between lastc and c? */
619 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
620 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
621 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
622 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
623 		if (atbol || ateol) {
624 			st = expand(m->g, startst, stopst, st, atbol, ateol);
625 			SP("bef", st, c);
626 		}
627 
628 		/* are we done? */
629 		if (ISSET(st, stopst) || p == stop)
630 			break;		/* NOTE BREAK OUT */
631 
632 		/* no, we must deal with this character */
633 		ASSIGN(tmp, st);
634 		ASSIGN(st, fresh);
635 		st = step(m->g, startst, stopst, tmp, c, st);
636 		SP("aft", st, c);
637 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
638 		p++;
639 	}
640 
641 	assert(coldp != NULL);
642 	m->coldp = coldp;
643 	if (ISSET(st, stopst))
644 		return(p+1);
645 	else
646 		return(NULL);
647 }
648 
649 /*
650  - slow - step through the string more deliberately
651  */
652 static uchar *			/* where it ended */
653 slow(m, start, stop, startst, stopst)
654 register struct match *m;
655 uchar *start;
656 uchar *stop;
657 sopno startst;
658 sopno stopst;
659 {
660 	register states st = m->st;
661 	register states empty = m->empty;
662 	register states tmp = m->tmp;
663 	register uchar *p = start;
664 	register uchar c = (start == m->beginp) ? '\0' : *(start-1);
665 	register uchar lastc;	/* previous c */
666 	register int atbol;
667 	register int ateol;
668 	register uchar *matchp;	/* last p at which a match ended */
669 
670 	DO("slow", start, stop, startst, stopst);
671 	CLEAR(st);
672 	SET1(st, startst);
673 	SP("sstart", st, *p);
674 	matchp = NULL;
675 	for (;;) {
676 		/* next character */
677 		lastc = c;
678 		c = (p == m->endp) ? '\0' : *p;
679 
680 		/* is there an EOL and/or BOL between lastc and c? */
681 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
682 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
683 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
684 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
685 
686 		/* do we need an expansion before looking at the char? */
687 		if (p == start || atbol || ateol) {
688 			st = expand(m->g, startst, stopst, st, atbol, ateol);
689 			SP("sbef", st, c);
690 		}
691 
692 		/* are we done? */
693 		if (ISSET(st, stopst))
694 			matchp = p;
695 		if (EQ(st, empty) || p == stop)
696 			break;		/* NOTE BREAK OUT */
697 
698 		/* no, we must deal with this character */
699 		ASSIGN(tmp, st);
700 		ASSIGN(st, empty);
701 		st = step(m->g, startst, stopst, tmp, c, st);
702 		SP("saft", st, c);
703 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
704 		p++;
705 	}
706 
707 	return(matchp);
708 }
709 
710 
711 /*
712  - expand - return set of states reachable from an initial set
713  */
714 static states
715 expand(g, start, stop, st, atbol, ateol)
716 register struct re_guts *g;
717 int start;			/* start state within strip */
718 int stop;			/* state after stop state within strip */
719 register states st;
720 int atbol;			/* at start or just after \n? (for BOL) */
721 int ateol;			/* just before \n or \0? (for EOL) */
722 {
723 	register sop s;
724 	register sopno pc;
725 	register onestate here;		/* note, macros know this name */
726 	register sopno look;
727 	register int i;
728 
729 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
730 		s = g->strip[pc];
731 		switch (OP(s)) {
732 		case OEND:
733 			assert(pc == stop-1);
734 			break;
735 		case OCHAR:		/* can't get past this */
736 			break;
737 		case OBOL:
738 			if (atbol)
739 				FWD(st, st, 1);
740 			break;
741 		case OEOL:
742 			if (ateol)
743 				FWD(st, st, 1);
744 			break;
745 		case OANY:		/* can't get past this either */
746 			break;
747 		case OANYOF:		/* or this */
748 			break;
749 		case OBACK_:		/* not significant here */
750 		case O_BACK:
751 			FWD(st, st, 1);
752 			break;
753 		case OPLUS_:		/* forward, this is just an empty */
754 			FWD(st, st, 1);
755 			break;
756 		case O_PLUS:		/* both forward and back */
757 			FWD(st, st, 1);
758 			i = ISSETBACK(st, OPND(s));
759 			BACK(st, st, OPND(s));
760 			if (!i && ISSETBACK(st, OPND(s))) {
761 				/* oho, must reconsider loop body */
762 				pc -= OPND(s) + 1;
763 				INIT(here, pc);
764 			}
765 			break;
766 		case OQUEST_:		/* two branches, both forward */
767 			FWD(st, st, 1);
768 			FWD(st, st, OPND(s));
769 			break;
770 		case O_QUEST:		/* just an empty */
771 			FWD(st, st, 1);
772 			break;
773 		case OLPAREN:		/* not significant here */
774 		case ORPAREN:
775 			FWD(st, st, 1);
776 			break;
777 		case OCH_:		/* mark the first two branches */
778 			FWD(st, st, 1);
779 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
780 			FWD(st, st, OPND(s));
781 			break;
782 		case OOR1:		/* done a branch, find the O_CH */
783 			if (ISSTATEIN(st, here)) {
784 				for (look = 1;
785 						OP(s = g->strip[pc+look]) != O_CH;
786 						look += OPND(s))
787 					assert(OP(s) == OOR2);
788 				FWD(st, st, look);
789 			}
790 			break;
791 		case OOR2:		/* propagate OCH_'s marking onward */
792 			FWD(st, st, 1);
793 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
794 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
795 				FWD(st, st, OPND(s));
796 			}
797 			break;
798 		case O_CH:		/* just an empty */
799 			FWD(st, st, 1);
800 			break;
801 		default:		/* ooooops... */
802 			assert(0);
803 			break;
804 		}
805 	}
806 
807 	return(st);
808 }
809 
810 /*
811  - step - map set of states reachable before char to set reachable after
812  */
813 static states
814 step(g, start, stop, bef, ch, aft)
815 register struct re_guts *g;
816 int start;			/* start state within strip */
817 int stop;			/* state after stop state within strip */
818 register states bef;		/* states reachable before */
819 uchar ch;
820 register states aft;		/* states already known reachable after */
821 {
822 	register cset *cs;
823 	register sop s;
824 	register sopno pc;
825 	register onestate here;		/* note, macros know this name */
826 	register sopno look;
827 	register int i;
828 
829 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
830 		s = g->strip[pc];
831 		switch (OP(s)) {
832 		case OEND:
833 			assert(pc == stop-1);
834 			break;
835 		case OCHAR:
836 			if (ch == OPND(s))
837 				FWD(aft, bef, 1);
838 			break;
839 		case OBOL:	/* these are looked after by expand() */
840 		case OEOL:
841 			break;
842 		case OANY:
843 			FWD(aft, bef, 1);
844 			break;
845 		case OANYOF:
846 			cs = &g->sets[OPND(s)];
847 			if (CHIN(cs, ch))
848 				FWD(aft, bef, 1);
849 			break;
850 		case OBACK_:		/* ignored here */
851 		case O_BACK:
852 			FWD(aft, aft, 1);
853 			break;
854 		case OPLUS_:		/* forward, this is just an empty */
855 			FWD(aft, aft, 1);
856 			break;
857 		case O_PLUS:		/* both forward and back */
858 			FWD(aft, aft, 1);
859 			i = ISSETBACK(aft, OPND(s));
860 			BACK(aft, aft, OPND(s));
861 			if (!i && ISSETBACK(aft, OPND(s))) {
862 				/* oho, must reconsider loop body */
863 				pc -= OPND(s) + 1;
864 				INIT(here, pc);
865 			}
866 			break;
867 		case OQUEST_:		/* two branches, both forward */
868 			FWD(aft, aft, 1);
869 			FWD(aft, aft, OPND(s));
870 			break;
871 		case O_QUEST:		/* just an empty */
872 			FWD(aft, aft, 1);
873 			break;
874 		case OLPAREN:		/* not significant here */
875 		case ORPAREN:
876 			FWD(aft, aft, 1);
877 			break;
878 		case OCH_:		/* mark the first two branches */
879 			FWD(aft, aft, 1);
880 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
881 			FWD(aft, aft, OPND(s));
882 			break;
883 		case OOR1:		/* done a branch, find the O_CH */
884 			if (ISSTATEIN(aft, here)) {
885 				for (look = 1;
886 						OP(s = g->strip[pc+look]) != O_CH;
887 						look += OPND(s))
888 					assert(OP(s) == OOR2);
889 				FWD(aft, aft, look);
890 			}
891 			break;
892 		case OOR2:		/* propagate OCH_'s marking */
893 			FWD(aft, aft, 1);
894 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
895 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
896 				FWD(aft, aft, OPND(s));
897 			}
898 			break;
899 		case O_CH:		/* just empty */
900 			FWD(aft, aft, 1);
901 			break;
902 		default:		/* ooooops... */
903 			assert(0);
904 			break;
905 		}
906 	}
907 
908 	return(aft);
909 }
910 
911 #ifndef NDEBUG
912 /*
913  - print - print a set of states
914  */
915 static void
916 print(g, caption, st, ch, d)
917 struct re_guts *g;
918 char *caption;
919 states st;
920 uchar ch;
921 FILE *d;
922 {
923 	register int i;
924 	register int first = 1;
925 
926 	fprintf(d, "%s", caption);
927 	if (ch != '\0')
928 		fprintf(d, " %s", regchar(ch));
929 	for (i = 0; i < g->nstates; i++)
930 		if (ISSET(st, i)) {
931 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
932 			first = 0;
933 		}
934 	fprintf(d, "\n");
935 }
936 #endif
937 
938 #undef	matcher
939 #undef	fast
940 #undef	slow
941 #undef	dissect
942 #undef	backref
943 #undef	expand
944 #undef	step
945 #undef	print
946 #undef	match
947