1*f14fb602SLionel Sambuc /* $NetBSD: engine.c,v 1.24 2012/03/13 21:13:42 christos Exp $ */
22fe8fb19SBen Gras
3b7061124SArun Thomas /*-
4b7061124SArun Thomas * Copyright (c) 1992, 1993, 1994
5b7061124SArun Thomas * The Regents of the University of California. All rights reserved.
6b7061124SArun Thomas *
7b7061124SArun Thomas * This code is derived from software contributed to Berkeley by
8b7061124SArun Thomas * Henry Spencer.
9b7061124SArun Thomas *
10b7061124SArun Thomas * Redistribution and use in source and binary forms, with or without
11b7061124SArun Thomas * modification, are permitted provided that the following conditions
12b7061124SArun Thomas * are met:
13b7061124SArun Thomas * 1. Redistributions of source code must retain the above copyright
14b7061124SArun Thomas * notice, this list of conditions and the following disclaimer.
15b7061124SArun Thomas * 2. Redistributions in binary form must reproduce the above copyright
16b7061124SArun Thomas * notice, this list of conditions and the following disclaimer in the
17b7061124SArun Thomas * documentation and/or other materials provided with the distribution.
182fe8fb19SBen Gras * 3. Neither the name of the University nor the names of its contributors
192fe8fb19SBen Gras * may be used to endorse or promote products derived from this software
202fe8fb19SBen Gras * without specific prior written permission.
212fe8fb19SBen Gras *
222fe8fb19SBen Gras * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
232fe8fb19SBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
242fe8fb19SBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
252fe8fb19SBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
262fe8fb19SBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
272fe8fb19SBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
282fe8fb19SBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
292fe8fb19SBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
302fe8fb19SBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
312fe8fb19SBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
322fe8fb19SBen Gras * SUCH DAMAGE.
332fe8fb19SBen Gras *
342fe8fb19SBen Gras * @(#)engine.c 8.5 (Berkeley) 3/20/94
352fe8fb19SBen Gras */
362fe8fb19SBen Gras
372fe8fb19SBen Gras /*-
382fe8fb19SBen Gras * Copyright (c) 1992, 1993, 1994 Henry Spencer.
392fe8fb19SBen Gras *
402fe8fb19SBen Gras * This code is derived from software contributed to Berkeley by
412fe8fb19SBen Gras * Henry Spencer.
422fe8fb19SBen Gras *
432fe8fb19SBen Gras * Redistribution and use in source and binary forms, with or without
442fe8fb19SBen Gras * modification, are permitted provided that the following conditions
452fe8fb19SBen Gras * are met:
462fe8fb19SBen Gras * 1. Redistributions of source code must retain the above copyright
472fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer.
482fe8fb19SBen Gras * 2. Redistributions in binary form must reproduce the above copyright
492fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer in the
502fe8fb19SBen Gras * documentation and/or other materials provided with the distribution.
51b7061124SArun Thomas * 3. All advertising materials mentioning features or use of this software
52b7061124SArun Thomas * must display the following acknowledgement:
53b7061124SArun Thomas * This product includes software developed by the University of
54b7061124SArun Thomas * California, Berkeley and its contributors.
55b7061124SArun Thomas * 4. Neither the name of the University nor the names of its contributors
56b7061124SArun Thomas * may be used to endorse or promote products derived from this software
57b7061124SArun Thomas * without specific prior written permission.
58b7061124SArun Thomas *
59b7061124SArun Thomas * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60b7061124SArun Thomas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61b7061124SArun Thomas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62b7061124SArun Thomas * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63b7061124SArun Thomas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64b7061124SArun Thomas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65b7061124SArun Thomas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66b7061124SArun Thomas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67b7061124SArun Thomas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68b7061124SArun Thomas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69b7061124SArun Thomas * SUCH DAMAGE.
70b7061124SArun Thomas *
71b7061124SArun Thomas * @(#)engine.c 8.5 (Berkeley) 3/20/94
72b7061124SArun Thomas */
73b7061124SArun Thomas
74b7061124SArun Thomas /*
75b7061124SArun Thomas * The matching engine and friends. This file is #included by regexec.c
76b7061124SArun Thomas * after suitable #defines of a variety of macros used herein, so that
77b7061124SArun Thomas * different state representations can be used without duplicating masses
78b7061124SArun Thomas * of code.
79b7061124SArun Thomas */
80b7061124SArun Thomas
81b7061124SArun Thomas #ifdef SNAMES
82b7061124SArun Thomas #define matcher smatcher
83b7061124SArun Thomas #define fast sfast
84b7061124SArun Thomas #define slow sslow
85b7061124SArun Thomas #define dissect sdissect
86b7061124SArun Thomas #define backref sbackref
87b7061124SArun Thomas #define step sstep
88b7061124SArun Thomas #define print sprint
89b7061124SArun Thomas #define at sat
90b7061124SArun Thomas #define match smat
912fe8fb19SBen Gras #define nope snope
92b7061124SArun Thomas #endif
93b7061124SArun Thomas #ifdef LNAMES
94b7061124SArun Thomas #define matcher lmatcher
95b7061124SArun Thomas #define fast lfast
96b7061124SArun Thomas #define slow lslow
97b7061124SArun Thomas #define dissect ldissect
98b7061124SArun Thomas #define backref lbackref
99b7061124SArun Thomas #define step lstep
100b7061124SArun Thomas #define print lprint
101b7061124SArun Thomas #define at lat
102b7061124SArun Thomas #define match lmat
1032fe8fb19SBen Gras #define nope lnope
104b7061124SArun Thomas #endif
105b7061124SArun Thomas
106b7061124SArun Thomas /* another structure passed up and down to avoid zillions of parameters */
107b7061124SArun Thomas struct match {
108b7061124SArun Thomas struct re_guts *g;
109b7061124SArun Thomas int eflags;
110b7061124SArun Thomas regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
1112fe8fb19SBen Gras const char *offp; /* offsets work from here */
1122fe8fb19SBen Gras const char *beginp; /* start of string -- virtual NUL precedes */
1132fe8fb19SBen Gras const char *endp; /* end of string -- virtual NUL here */
1142fe8fb19SBen Gras const char *coldp; /* can be no match starting before here */
1152fe8fb19SBen Gras const char **lastpos; /* [nplus+1] */
116b7061124SArun Thomas STATEVARS;
117b7061124SArun Thomas states st; /* current states */
118b7061124SArun Thomas states fresh; /* states for a fresh start */
119b7061124SArun Thomas states tmp; /* temporary */
120b7061124SArun Thomas states empty; /* empty set of states */
121b7061124SArun Thomas };
122b7061124SArun Thomas
123b7061124SArun Thomas /* ========= begin header generated by ./mkh ========= */
124b7061124SArun Thomas #ifdef __cplusplus
125b7061124SArun Thomas extern "C" {
126b7061124SArun Thomas #endif
127b7061124SArun Thomas
128b7061124SArun Thomas /* === engine.c === */
1292fe8fb19SBen Gras static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
1302fe8fb19SBen Gras static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
1312fe8fb19SBen Gras static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev);
1322fe8fb19SBen Gras static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
1332fe8fb19SBen Gras static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
134b7061124SArun Thomas static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
135b7061124SArun Thomas #define BOL (OUT+1)
136b7061124SArun Thomas #define EOL (BOL+1)
137b7061124SArun Thomas #define BOLEOL (BOL+2)
138b7061124SArun Thomas #define NOTHING (BOL+3)
139b7061124SArun Thomas #define BOW (BOL+4)
140b7061124SArun Thomas #define EOW (BOL+5)
141b7061124SArun Thomas #define CODEMAX (BOL+5) /* highest code used */
142b7061124SArun Thomas #define NONCHAR(c) ((c) > CHAR_MAX)
143b7061124SArun Thomas #define NNONCHAR (CODEMAX-CHAR_MAX)
144b7061124SArun Thomas #ifdef REDEBUG
145b7061124SArun Thomas static void print(struct match *m, char *caption, states st, int ch, FILE *d);
146b7061124SArun Thomas #endif
147b7061124SArun Thomas #ifdef REDEBUG
148b7061124SArun Thomas static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
149b7061124SArun Thomas #endif
150b7061124SArun Thomas #ifdef REDEBUG
151b7061124SArun Thomas static char *pchar(int ch);
152b7061124SArun Thomas #endif
153b7061124SArun Thomas
154b7061124SArun Thomas #ifdef __cplusplus
155b7061124SArun Thomas }
156b7061124SArun Thomas #endif
157b7061124SArun Thomas /* ========= end header generated by ./mkh ========= */
158b7061124SArun Thomas
159b7061124SArun Thomas #ifdef REDEBUG
160b7061124SArun Thomas #define SP(t, s, c) print(m, t, s, c, stdout)
161b7061124SArun Thomas #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
162b7061124SArun Thomas #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); }
1632fe8fb19SBen Gras static int nope = 0;
164b7061124SArun Thomas #else
165b7061124SArun Thomas #define SP(t, s, c) /* nothing */
166b7061124SArun Thomas #define AT(t, p1, p2, s1, s2) /* nothing */
167b7061124SArun Thomas #define NOTE(s) /* nothing */
168b7061124SArun Thomas #endif
169b7061124SArun Thomas
170b7061124SArun Thomas /*
171b7061124SArun Thomas - matcher - the actual matching engine
1722fe8fb19SBen Gras == static int matcher(struct re_guts *g, char *string, \
173b7061124SArun Thomas == size_t nmatch, regmatch_t pmatch[], int eflags);
174b7061124SArun Thomas */
175b7061124SArun Thomas static int /* 0 success, REG_NOMATCH failure */
matcher(struct re_guts * g,const char * string,size_t nmatch,regmatch_t pmatch[],int eflags)1762fe8fb19SBen Gras matcher(
1772fe8fb19SBen Gras struct re_guts *g,
1782fe8fb19SBen Gras const char *string,
1792fe8fb19SBen Gras size_t nmatch,
1802fe8fb19SBen Gras regmatch_t pmatch[],
1812fe8fb19SBen Gras int eflags)
182b7061124SArun Thomas {
1832fe8fb19SBen Gras const char *endp;
1842fe8fb19SBen Gras size_t i;
185b7061124SArun Thomas struct match mv;
1862fe8fb19SBen Gras struct match *m = &mv;
1872fe8fb19SBen Gras const char *dp;
1882fe8fb19SBen Gras const sopno gf = g->firststate+1; /* +1 for OEND */
1892fe8fb19SBen Gras const sopno gl = g->laststate;
1902fe8fb19SBen Gras const char *start;
1912fe8fb19SBen Gras const char *stop;
1922fe8fb19SBen Gras int error = 0;
1932fe8fb19SBen Gras
1942fe8fb19SBen Gras _DIAGASSERT(g != NULL);
1952fe8fb19SBen Gras _DIAGASSERT(string != NULL);
1962fe8fb19SBen Gras /* pmatch checked below */
197b7061124SArun Thomas
198b7061124SArun Thomas /* simplify the situation where possible */
199b7061124SArun Thomas if (g->cflags®_NOSUB)
200b7061124SArun Thomas nmatch = 0;
201b7061124SArun Thomas if (eflags®_STARTEND) {
2022fe8fb19SBen Gras _DIAGASSERT(pmatch != NULL);
2032fe8fb19SBen Gras start = string + (size_t)pmatch[0].rm_so;
2042fe8fb19SBen Gras stop = string + (size_t)pmatch[0].rm_eo;
205b7061124SArun Thomas } else {
206b7061124SArun Thomas start = string;
207b7061124SArun Thomas stop = start + strlen(start);
208b7061124SArun Thomas }
209b7061124SArun Thomas if (stop < start)
210b7061124SArun Thomas return(REG_INVARG);
211b7061124SArun Thomas
212b7061124SArun Thomas /* prescreening; this does wonders for this rather slow code */
213b7061124SArun Thomas if (g->must != NULL) {
214b7061124SArun Thomas for (dp = start; dp < stop; dp++)
215*f14fb602SLionel Sambuc if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
216*f14fb602SLionel Sambuc memcmp(dp, g->must, g->mlen) == 0)
217b7061124SArun Thomas break;
218b7061124SArun Thomas if (dp == stop) /* we didn't find g->must */
219b7061124SArun Thomas return(REG_NOMATCH);
220b7061124SArun Thomas }
221b7061124SArun Thomas
222b7061124SArun Thomas /* match struct setup */
223b7061124SArun Thomas m->g = g;
224b7061124SArun Thomas m->eflags = eflags;
225b7061124SArun Thomas m->pmatch = NULL;
226b7061124SArun Thomas m->lastpos = NULL;
227b7061124SArun Thomas m->offp = string;
228b7061124SArun Thomas m->beginp = start;
229b7061124SArun Thomas m->endp = stop;
230b7061124SArun Thomas STATESETUP(m, 4);
231b7061124SArun Thomas SETUP(m->st);
232b7061124SArun Thomas SETUP(m->fresh);
233b7061124SArun Thomas SETUP(m->tmp);
234b7061124SArun Thomas SETUP(m->empty);
235b7061124SArun Thomas CLEAR(m->empty);
236b7061124SArun Thomas
237b7061124SArun Thomas /* this loop does only one repetition except for backrefs */
238b7061124SArun Thomas for (;;) {
239b7061124SArun Thomas endp = fast(m, start, stop, gf, gl);
240b7061124SArun Thomas if (endp == NULL) { /* a miss */
2412fe8fb19SBen Gras error = REG_NOMATCH;
2422fe8fb19SBen Gras goto done;
243b7061124SArun Thomas }
244b7061124SArun Thomas if (nmatch == 0 && !g->backrefs)
245b7061124SArun Thomas break; /* no further info needed */
246b7061124SArun Thomas
247b7061124SArun Thomas /* where? */
248b7061124SArun Thomas assert(m->coldp != NULL);
249b7061124SArun Thomas for (;;) {
250b7061124SArun Thomas NOTE("finding start");
251b7061124SArun Thomas endp = slow(m, m->coldp, stop, gf, gl);
252b7061124SArun Thomas if (endp != NULL)
253b7061124SArun Thomas break;
254b7061124SArun Thomas assert(m->coldp < m->endp);
255b7061124SArun Thomas m->coldp++;
256b7061124SArun Thomas }
257b7061124SArun Thomas if (nmatch == 1 && !g->backrefs)
258b7061124SArun Thomas break; /* no further info needed */
259b7061124SArun Thomas
260b7061124SArun Thomas /* oh my, he wants the subexpressions... */
261b7061124SArun Thomas if (m->pmatch == NULL)
262b7061124SArun Thomas m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
263b7061124SArun Thomas sizeof(regmatch_t));
264b7061124SArun Thomas if (m->pmatch == NULL) {
2652fe8fb19SBen Gras error = REG_ESPACE;
2662fe8fb19SBen Gras goto done;
267b7061124SArun Thomas }
268b7061124SArun Thomas for (i = 1; i <= m->g->nsub; i++)
2692fe8fb19SBen Gras m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
270b7061124SArun Thomas if (!g->backrefs && !(m->eflags®_BACKR)) {
271b7061124SArun Thomas NOTE("dissecting");
272b7061124SArun Thomas dp = dissect(m, m->coldp, endp, gf, gl);
273b7061124SArun Thomas } else {
274b7061124SArun Thomas if (g->nplus > 0 && m->lastpos == NULL)
2752fe8fb19SBen Gras m->lastpos = malloc((g->nplus+1) *
2762fe8fb19SBen Gras sizeof(const char *));
277b7061124SArun Thomas if (g->nplus > 0 && m->lastpos == NULL) {
2782fe8fb19SBen Gras error = REG_ESPACE;
2792fe8fb19SBen Gras goto done;
280b7061124SArun Thomas }
281b7061124SArun Thomas NOTE("backref dissect");
282b7061124SArun Thomas dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
283b7061124SArun Thomas }
284b7061124SArun Thomas if (dp != NULL)
285b7061124SArun Thomas break;
286b7061124SArun Thomas
287b7061124SArun Thomas /* uh-oh... we couldn't find a subexpression-level match */
288b7061124SArun Thomas assert(g->backrefs); /* must be back references doing it */
289b7061124SArun Thomas assert(g->nplus == 0 || m->lastpos != NULL);
290b7061124SArun Thomas for (;;) {
291b7061124SArun Thomas if (dp != NULL || endp <= m->coldp)
292b7061124SArun Thomas break; /* defeat */
293b7061124SArun Thomas NOTE("backoff");
294b7061124SArun Thomas endp = slow(m, m->coldp, endp-1, gf, gl);
295b7061124SArun Thomas if (endp == NULL)
296b7061124SArun Thomas break; /* defeat */
297b7061124SArun Thomas /* try it on a shorter possibility */
298b7061124SArun Thomas #ifndef NDEBUG
299b7061124SArun Thomas for (i = 1; i <= m->g->nsub; i++) {
3002fe8fb19SBen Gras assert(m->pmatch[i].rm_so == (regoff_t)-1);
3012fe8fb19SBen Gras assert(m->pmatch[i].rm_eo == (regoff_t)-1);
302b7061124SArun Thomas }
303b7061124SArun Thomas #endif
304b7061124SArun Thomas NOTE("backoff dissect");
305b7061124SArun Thomas dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
306b7061124SArun Thomas }
307b7061124SArun Thomas assert(dp == NULL || dp == endp);
308b7061124SArun Thomas if (dp != NULL) /* found a shorter one */
309b7061124SArun Thomas break;
310b7061124SArun Thomas
311b7061124SArun Thomas /* despite initial appearances, there is no match here */
312b7061124SArun Thomas NOTE("false alarm");
313b7061124SArun Thomas start = m->coldp + 1; /* recycle starting later */
314b7061124SArun Thomas assert(start <= stop);
315b7061124SArun Thomas }
316b7061124SArun Thomas
317b7061124SArun Thomas /* fill in the details if requested */
318b7061124SArun Thomas if (nmatch > 0) {
3192fe8fb19SBen Gras _DIAGASSERT(pmatch != NULL);
320b7061124SArun Thomas pmatch[0].rm_so = m->coldp - m->offp;
321b7061124SArun Thomas pmatch[0].rm_eo = endp - m->offp;
322b7061124SArun Thomas }
323b7061124SArun Thomas if (nmatch > 1) {
324b7061124SArun Thomas assert(m->pmatch != NULL);
325b7061124SArun Thomas for (i = 1; i < nmatch; i++)
326b7061124SArun Thomas if (i <= m->g->nsub)
327b7061124SArun Thomas pmatch[i] = m->pmatch[i];
328b7061124SArun Thomas else {
3292fe8fb19SBen Gras pmatch[i].rm_so = (regoff_t)-1;
3302fe8fb19SBen Gras pmatch[i].rm_eo = (regoff_t)-1;
331b7061124SArun Thomas }
332b7061124SArun Thomas }
333b7061124SArun Thomas
3342fe8fb19SBen Gras done:
3352fe8fb19SBen Gras if (m->pmatch != NULL) {
3362fe8fb19SBen Gras free(m->pmatch);
3372fe8fb19SBen Gras m->pmatch = NULL;
3382fe8fb19SBen Gras }
3392fe8fb19SBen Gras if (m->lastpos != NULL) {
3402fe8fb19SBen Gras free(m->lastpos);
3412fe8fb19SBen Gras m->lastpos = NULL;
3422fe8fb19SBen Gras }
343b7061124SArun Thomas STATETEARDOWN(m);
3442fe8fb19SBen Gras return error;
345b7061124SArun Thomas }
346b7061124SArun Thomas
347b7061124SArun Thomas /*
348b7061124SArun Thomas - dissect - figure out what matched what, no back references
3492fe8fb19SBen Gras == static const char *dissect(struct match *m, const char *start, \
3502fe8fb19SBen Gras == const char *stop, sopno startst, sopno stopst);
351b7061124SArun Thomas */
3522fe8fb19SBen Gras static const char * /* == stop (success) always */
dissect(struct match * m,const char * start,const char * stop,sopno startst,sopno stopst)3532fe8fb19SBen Gras dissect(
3542fe8fb19SBen Gras struct match *m,
3552fe8fb19SBen Gras const char *start,
3562fe8fb19SBen Gras const char *stop,
3572fe8fb19SBen Gras sopno startst,
3582fe8fb19SBen Gras sopno stopst)
359b7061124SArun Thomas {
3602fe8fb19SBen Gras int i;
3612fe8fb19SBen Gras sopno ss; /* start sop of current subRE */
3622fe8fb19SBen Gras sopno es; /* end sop of current subRE */
3632fe8fb19SBen Gras const char *sp; /* start of string matched by it */
3642fe8fb19SBen Gras const char *stp; /* string matched by it cannot pass here */
3652fe8fb19SBen Gras const char *rest; /* start of rest of string */
3662fe8fb19SBen Gras const char *tail; /* string unmatched by rest of RE */
3672fe8fb19SBen Gras sopno ssub; /* start sop of subsubRE */
3682fe8fb19SBen Gras sopno esub; /* end sop of subsubRE */
3692fe8fb19SBen Gras const char *ssp; /* start of string matched by subsubRE */
3702fe8fb19SBen Gras const char *sep; /* end of string matched by subsubRE */
3712fe8fb19SBen Gras const char *oldssp; /* previous ssp */
3722fe8fb19SBen Gras #ifndef NDEBUG
3732fe8fb19SBen Gras const char *dp;
3742fe8fb19SBen Gras #endif
3752fe8fb19SBen Gras
3762fe8fb19SBen Gras _DIAGASSERT(m != NULL);
3772fe8fb19SBen Gras _DIAGASSERT(start != NULL);
3782fe8fb19SBen Gras _DIAGASSERT(stop != NULL);
379b7061124SArun Thomas
380b7061124SArun Thomas AT("diss", start, stop, startst, stopst);
381b7061124SArun Thomas sp = start;
382b7061124SArun Thomas for (ss = startst; ss < stopst; ss = es) {
383b7061124SArun Thomas /* identify end of subRE */
384b7061124SArun Thomas es = ss;
385b7061124SArun Thomas switch (OP(m->g->strip[es])) {
386b7061124SArun Thomas case OPLUS_:
387b7061124SArun Thomas case OQUEST_:
388b7061124SArun Thomas es += OPND(m->g->strip[es]);
389b7061124SArun Thomas break;
390b7061124SArun Thomas case OCH_:
391b7061124SArun Thomas while (OP(m->g->strip[es]) != O_CH)
392b7061124SArun Thomas es += OPND(m->g->strip[es]);
393b7061124SArun Thomas break;
394b7061124SArun Thomas }
395b7061124SArun Thomas es++;
396b7061124SArun Thomas
397b7061124SArun Thomas /* figure out what it matched */
398b7061124SArun Thomas switch (OP(m->g->strip[ss])) {
399b7061124SArun Thomas case OEND:
400b7061124SArun Thomas assert(nope);
401b7061124SArun Thomas break;
402b7061124SArun Thomas case OCHAR:
403b7061124SArun Thomas sp++;
404b7061124SArun Thomas break;
405b7061124SArun Thomas case OBOL:
406b7061124SArun Thomas case OEOL:
407b7061124SArun Thomas case OBOW:
408b7061124SArun Thomas case OEOW:
409b7061124SArun Thomas break;
410b7061124SArun Thomas case OANY:
411b7061124SArun Thomas case OANYOF:
412b7061124SArun Thomas sp++;
413b7061124SArun Thomas break;
414b7061124SArun Thomas case OBACK_:
415b7061124SArun Thomas case O_BACK:
416b7061124SArun Thomas assert(nope);
417b7061124SArun Thomas break;
418b7061124SArun Thomas /* cases where length of match is hard to find */
419b7061124SArun Thomas case OQUEST_:
420b7061124SArun Thomas stp = stop;
421b7061124SArun Thomas for (;;) {
422b7061124SArun Thomas /* how long could this one be? */
423b7061124SArun Thomas rest = slow(m, sp, stp, ss, es);
424b7061124SArun Thomas assert(rest != NULL); /* it did match */
425b7061124SArun Thomas /* could the rest match the rest? */
426b7061124SArun Thomas tail = slow(m, rest, stop, es, stopst);
427b7061124SArun Thomas if (tail == stop)
428b7061124SArun Thomas break; /* yes! */
429b7061124SArun Thomas /* no -- try a shorter match for this one */
430b7061124SArun Thomas stp = rest - 1;
431b7061124SArun Thomas assert(stp >= sp); /* it did work */
432b7061124SArun Thomas }
433b7061124SArun Thomas ssub = ss + 1;
434b7061124SArun Thomas esub = es - 1;
435b7061124SArun Thomas /* did innards match? */
436b7061124SArun Thomas if (slow(m, sp, rest, ssub, esub) != NULL) {
4372fe8fb19SBen Gras #ifdef NDEBUG
4382fe8fb19SBen Gras (void)
4392fe8fb19SBen Gras #else
4402fe8fb19SBen Gras dp =
4412fe8fb19SBen Gras #endif
4422fe8fb19SBen Gras dissect(m, sp, rest, ssub, esub);
443b7061124SArun Thomas assert(dp == rest);
444b7061124SArun Thomas } else /* no */
445b7061124SArun Thomas assert(sp == rest);
446b7061124SArun Thomas sp = rest;
447b7061124SArun Thomas break;
448b7061124SArun Thomas case OPLUS_:
449b7061124SArun Thomas stp = stop;
450b7061124SArun Thomas for (;;) {
451b7061124SArun Thomas /* how long could this one be? */
452b7061124SArun Thomas rest = slow(m, sp, stp, ss, es);
453b7061124SArun Thomas assert(rest != NULL); /* it did match */
454b7061124SArun Thomas /* could the rest match the rest? */
455b7061124SArun Thomas tail = slow(m, rest, stop, es, stopst);
456b7061124SArun Thomas if (tail == stop)
457b7061124SArun Thomas break; /* yes! */
458b7061124SArun Thomas /* no -- try a shorter match for this one */
459b7061124SArun Thomas stp = rest - 1;
460b7061124SArun Thomas assert(stp >= sp); /* it did work */
461b7061124SArun Thomas }
462b7061124SArun Thomas ssub = ss + 1;
463b7061124SArun Thomas esub = es - 1;
464b7061124SArun Thomas ssp = sp;
465b7061124SArun Thomas oldssp = ssp;
466b7061124SArun Thomas for (;;) { /* find last match of innards */
467b7061124SArun Thomas sep = slow(m, ssp, rest, ssub, esub);
468b7061124SArun Thomas if (sep == NULL || sep == ssp)
469b7061124SArun Thomas break; /* failed or matched null */
470b7061124SArun Thomas oldssp = ssp; /* on to next try */
471b7061124SArun Thomas ssp = sep;
472b7061124SArun Thomas }
473b7061124SArun Thomas if (sep == NULL) {
474b7061124SArun Thomas /* last successful match */
475b7061124SArun Thomas sep = ssp;
476b7061124SArun Thomas ssp = oldssp;
477b7061124SArun Thomas }
478b7061124SArun Thomas assert(sep == rest); /* must exhaust substring */
479b7061124SArun Thomas assert(slow(m, ssp, sep, ssub, esub) == rest);
4802fe8fb19SBen Gras #ifdef NDEBUG
4812fe8fb19SBen Gras (void)
4822fe8fb19SBen Gras #else
4832fe8fb19SBen Gras dp =
4842fe8fb19SBen Gras #endif
4852fe8fb19SBen Gras dissect(m, ssp, sep, ssub, esub);
486b7061124SArun Thomas assert(dp == sep);
487b7061124SArun Thomas sp = rest;
488b7061124SArun Thomas break;
489b7061124SArun Thomas case OCH_:
490b7061124SArun Thomas stp = stop;
491b7061124SArun Thomas for (;;) {
492b7061124SArun Thomas /* how long could this one be? */
493b7061124SArun Thomas rest = slow(m, sp, stp, ss, es);
494b7061124SArun Thomas assert(rest != NULL); /* it did match */
495b7061124SArun Thomas /* could the rest match the rest? */
496b7061124SArun Thomas tail = slow(m, rest, stop, es, stopst);
497b7061124SArun Thomas if (tail == stop)
498b7061124SArun Thomas break; /* yes! */
499b7061124SArun Thomas /* no -- try a shorter match for this one */
500b7061124SArun Thomas stp = rest - 1;
501b7061124SArun Thomas assert(stp >= sp); /* it did work */
502b7061124SArun Thomas }
503b7061124SArun Thomas ssub = ss + 1;
504b7061124SArun Thomas esub = ss + OPND(m->g->strip[ss]) - 1;
505b7061124SArun Thomas assert(OP(m->g->strip[esub]) == OOR1);
506b7061124SArun Thomas for (;;) { /* find first matching branch */
507b7061124SArun Thomas if (slow(m, sp, rest, ssub, esub) == rest)
508b7061124SArun Thomas break; /* it matched all of it */
509b7061124SArun Thomas /* that one missed, try next one */
510b7061124SArun Thomas assert(OP(m->g->strip[esub]) == OOR1);
511b7061124SArun Thomas esub++;
512b7061124SArun Thomas assert(OP(m->g->strip[esub]) == OOR2);
513b7061124SArun Thomas ssub = esub + 1;
514b7061124SArun Thomas esub += OPND(m->g->strip[esub]);
515b7061124SArun Thomas if (OP(m->g->strip[esub]) == OOR2)
516b7061124SArun Thomas esub--;
517b7061124SArun Thomas else
518b7061124SArun Thomas assert(OP(m->g->strip[esub]) == O_CH);
519b7061124SArun Thomas }
5202fe8fb19SBen Gras #ifdef NDEBUG
5212fe8fb19SBen Gras (void)
5222fe8fb19SBen Gras #else
5232fe8fb19SBen Gras dp =
5242fe8fb19SBen Gras #endif
5252fe8fb19SBen Gras dissect(m, sp, rest, ssub, esub);
526b7061124SArun Thomas assert(dp == rest);
527b7061124SArun Thomas sp = rest;
528b7061124SArun Thomas break;
529b7061124SArun Thomas case O_PLUS:
530b7061124SArun Thomas case O_QUEST:
531b7061124SArun Thomas case OOR1:
532b7061124SArun Thomas case OOR2:
533b7061124SArun Thomas case O_CH:
534b7061124SArun Thomas assert(nope);
535b7061124SArun Thomas break;
536b7061124SArun Thomas case OLPAREN:
537b7061124SArun Thomas i = OPND(m->g->strip[ss]);
538b7061124SArun Thomas assert(0 < i && i <= m->g->nsub);
539b7061124SArun Thomas m->pmatch[i].rm_so = sp - m->offp;
540b7061124SArun Thomas break;
541b7061124SArun Thomas case ORPAREN:
542b7061124SArun Thomas i = OPND(m->g->strip[ss]);
543b7061124SArun Thomas assert(0 < i && i <= m->g->nsub);
544b7061124SArun Thomas m->pmatch[i].rm_eo = sp - m->offp;
545b7061124SArun Thomas break;
546b7061124SArun Thomas default: /* uh oh */
547b7061124SArun Thomas assert(nope);
548b7061124SArun Thomas break;
549b7061124SArun Thomas }
550b7061124SArun Thomas }
551b7061124SArun Thomas
552b7061124SArun Thomas assert(sp == stop);
553b7061124SArun Thomas return(sp);
554b7061124SArun Thomas }
555b7061124SArun Thomas
556b7061124SArun Thomas /*
557b7061124SArun Thomas - backref - figure out what matched what, figuring in back references
5582fe8fb19SBen Gras == static const char *backref(struct match *m, const char *start, \
5592fe8fb19SBen Gras == const char *stop, sopno startst, sopno stopst, sopno lev);
560b7061124SArun Thomas */
5612fe8fb19SBen Gras static const char * /* == stop (success) or NULL (failure) */
backref(struct match * m,const char * start,const char * stop,sopno startst,sopno stopst,sopno lev)5622fe8fb19SBen Gras backref(
5632fe8fb19SBen Gras struct match *m,
5642fe8fb19SBen Gras const char *start,
5652fe8fb19SBen Gras const char *stop,
5662fe8fb19SBen Gras sopno startst,
5672fe8fb19SBen Gras sopno stopst,
5682fe8fb19SBen Gras sopno lev) /* PLUS nesting level */
569b7061124SArun Thomas {
5702fe8fb19SBen Gras int i;
5712fe8fb19SBen Gras sopno ss; /* start sop of current subRE */
5722fe8fb19SBen Gras const char *sp; /* start of string matched by it */
5732fe8fb19SBen Gras sopno ssub; /* start sop of subsubRE */
5742fe8fb19SBen Gras sopno esub; /* end sop of subsubRE */
5752fe8fb19SBen Gras const char *ssp; /* start of string matched by subsubRE */
5762fe8fb19SBen Gras const char *dp;
5772fe8fb19SBen Gras size_t len;
5782fe8fb19SBen Gras int hard;
5792fe8fb19SBen Gras sop s;
5802fe8fb19SBen Gras regoff_t offsave;
5812fe8fb19SBen Gras cset *cs;
5822fe8fb19SBen Gras
5832fe8fb19SBen Gras _DIAGASSERT(m != NULL);
5842fe8fb19SBen Gras _DIAGASSERT(start != NULL);
5852fe8fb19SBen Gras _DIAGASSERT(stop != NULL);
586b7061124SArun Thomas
587b7061124SArun Thomas AT("back", start, stop, startst, stopst);
588b7061124SArun Thomas sp = start;
589b7061124SArun Thomas
590b7061124SArun Thomas /* get as far as we can with easy stuff */
591b7061124SArun Thomas hard = 0;
592b7061124SArun Thomas for (ss = startst; !hard && ss < stopst; ss++)
593b7061124SArun Thomas switch (OP(s = m->g->strip[ss])) {
594b7061124SArun Thomas case OCHAR:
595b7061124SArun Thomas if (sp == stop || *sp++ != (char)OPND(s))
596b7061124SArun Thomas return(NULL);
597b7061124SArun Thomas break;
598b7061124SArun Thomas case OANY:
599b7061124SArun Thomas if (sp == stop)
600b7061124SArun Thomas return(NULL);
601b7061124SArun Thomas sp++;
602b7061124SArun Thomas break;
603b7061124SArun Thomas case OANYOF:
604b7061124SArun Thomas cs = &m->g->sets[OPND(s)];
605b7061124SArun Thomas if (sp == stop || !CHIN(cs, *sp++))
606b7061124SArun Thomas return(NULL);
607b7061124SArun Thomas break;
608b7061124SArun Thomas case OBOL:
609b7061124SArun Thomas if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
610b7061124SArun Thomas (sp < m->endp && *(sp-1) == '\n' &&
611b7061124SArun Thomas (m->g->cflags®_NEWLINE)) )
612b7061124SArun Thomas { /* yes */ }
613b7061124SArun Thomas else
614b7061124SArun Thomas return(NULL);
615b7061124SArun Thomas break;
616b7061124SArun Thomas case OEOL:
617b7061124SArun Thomas if ( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
618b7061124SArun Thomas (sp < m->endp && *sp == '\n' &&
619b7061124SArun Thomas (m->g->cflags®_NEWLINE)) )
620b7061124SArun Thomas { /* yes */ }
621b7061124SArun Thomas else
622b7061124SArun Thomas return(NULL);
623b7061124SArun Thomas break;
624b7061124SArun Thomas case OBOW:
625b7061124SArun Thomas if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
626b7061124SArun Thomas (sp < m->endp && *(sp-1) == '\n' &&
627b7061124SArun Thomas (m->g->cflags®_NEWLINE)) ||
628b7061124SArun Thomas (sp > m->beginp &&
629b7061124SArun Thomas !ISWORD(*(sp-1))) ) &&
630b7061124SArun Thomas (sp < m->endp && ISWORD(*sp)) )
631b7061124SArun Thomas { /* yes */ }
632b7061124SArun Thomas else
633b7061124SArun Thomas return(NULL);
634b7061124SArun Thomas break;
635b7061124SArun Thomas case OEOW:
636b7061124SArun Thomas if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
637b7061124SArun Thomas (sp < m->endp && *sp == '\n' &&
638b7061124SArun Thomas (m->g->cflags®_NEWLINE)) ||
639b7061124SArun Thomas (sp < m->endp && !ISWORD(*sp)) ) &&
640b7061124SArun Thomas (sp > m->beginp && ISWORD(*(sp-1))) )
641b7061124SArun Thomas { /* yes */ }
642b7061124SArun Thomas else
643b7061124SArun Thomas return(NULL);
644b7061124SArun Thomas break;
645b7061124SArun Thomas case O_QUEST:
646b7061124SArun Thomas break;
647b7061124SArun Thomas case OOR1: /* matches null but needs to skip */
648b7061124SArun Thomas ss++;
649b7061124SArun Thomas s = m->g->strip[ss];
650b7061124SArun Thomas do {
651b7061124SArun Thomas assert(OP(s) == OOR2);
652b7061124SArun Thomas ss += OPND(s);
653b7061124SArun Thomas } while (OP(s = m->g->strip[ss]) != O_CH);
654b7061124SArun Thomas /* note that the ss++ gets us past the O_CH */
655b7061124SArun Thomas break;
656b7061124SArun Thomas default: /* have to make a choice */
657b7061124SArun Thomas hard = 1;
658b7061124SArun Thomas break;
659b7061124SArun Thomas }
660b7061124SArun Thomas if (!hard) { /* that was it! */
661b7061124SArun Thomas if (sp != stop)
662b7061124SArun Thomas return(NULL);
663b7061124SArun Thomas return(sp);
664b7061124SArun Thomas }
665b7061124SArun Thomas ss--; /* adjust for the for's final increment */
666b7061124SArun Thomas
667b7061124SArun Thomas /* the hard stuff */
668b7061124SArun Thomas AT("hard", sp, stop, ss, stopst);
669b7061124SArun Thomas s = m->g->strip[ss];
670b7061124SArun Thomas switch (OP(s)) {
671b7061124SArun Thomas case OBACK_: /* the vilest depths */
672b7061124SArun Thomas i = OPND(s);
673b7061124SArun Thomas assert(0 < i && i <= m->g->nsub);
6742fe8fb19SBen Gras if (m->pmatch[i].rm_eo == (regoff_t)-1)
675b7061124SArun Thomas return(NULL);
6762fe8fb19SBen Gras assert(m->pmatch[i].rm_so != (regoff_t)-1);
6772fe8fb19SBen Gras len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
6782fe8fb19SBen Gras if (len == 0)
6792fe8fb19SBen Gras return(NULL);
680b7061124SArun Thomas assert(stop - m->beginp >= len);
681b7061124SArun Thomas if (sp > stop - len)
682b7061124SArun Thomas return(NULL); /* not enough left to match */
6832fe8fb19SBen Gras ssp = m->offp + (size_t)m->pmatch[i].rm_so;
684b7061124SArun Thomas if (memcmp(sp, ssp, len) != 0)
685b7061124SArun Thomas return(NULL);
686b7061124SArun Thomas while (m->g->strip[ss] != SOP(O_BACK, i))
687b7061124SArun Thomas ss++;
688b7061124SArun Thomas return(backref(m, sp+len, stop, ss+1, stopst, lev));
6892fe8fb19SBen Gras
690b7061124SArun Thomas case OQUEST_: /* to null or not */
691b7061124SArun Thomas dp = backref(m, sp, stop, ss+1, stopst, lev);
692b7061124SArun Thomas if (dp != NULL)
693b7061124SArun Thomas return(dp); /* not */
694b7061124SArun Thomas return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
6952fe8fb19SBen Gras
696b7061124SArun Thomas case OPLUS_:
697b7061124SArun Thomas assert(m->lastpos != NULL);
698b7061124SArun Thomas assert(lev+1 <= m->g->nplus);
699b7061124SArun Thomas m->lastpos[lev+1] = sp;
700b7061124SArun Thomas return(backref(m, sp, stop, ss+1, stopst, lev+1));
7012fe8fb19SBen Gras
702b7061124SArun Thomas case O_PLUS:
703b7061124SArun Thomas if (sp == m->lastpos[lev]) /* last pass matched null */
704b7061124SArun Thomas return(backref(m, sp, stop, ss+1, stopst, lev-1));
705b7061124SArun Thomas /* try another pass */
706b7061124SArun Thomas m->lastpos[lev] = sp;
707b7061124SArun Thomas dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
708b7061124SArun Thomas if (dp == NULL)
7092fe8fb19SBen Gras dp = backref(m, sp, stop, ss+1, stopst, lev-1);
710b7061124SArun Thomas return(dp);
7112fe8fb19SBen Gras
712b7061124SArun Thomas case OCH_: /* find the right one, if any */
713b7061124SArun Thomas ssub = ss + 1;
714b7061124SArun Thomas esub = ss + OPND(s) - 1;
715b7061124SArun Thomas assert(OP(m->g->strip[esub]) == OOR1);
716b7061124SArun Thomas for (;;) { /* find first matching branch */
717b7061124SArun Thomas dp = backref(m, sp, stop, ssub, esub, lev);
718b7061124SArun Thomas if (dp != NULL)
719b7061124SArun Thomas return(dp);
720b7061124SArun Thomas /* that one missed, try next one */
721b7061124SArun Thomas if (OP(m->g->strip[esub]) == O_CH)
722b7061124SArun Thomas return(NULL); /* there is none */
723b7061124SArun Thomas esub++;
724b7061124SArun Thomas assert(OP(m->g->strip[esub]) == OOR2);
725b7061124SArun Thomas ssub = esub + 1;
726b7061124SArun Thomas esub += OPND(m->g->strip[esub]);
727b7061124SArun Thomas if (OP(m->g->strip[esub]) == OOR2)
728b7061124SArun Thomas esub--;
729b7061124SArun Thomas else
730b7061124SArun Thomas assert(OP(m->g->strip[esub]) == O_CH);
731b7061124SArun Thomas }
7322fe8fb19SBen Gras
733b7061124SArun Thomas case OLPAREN: /* must undo assignment if rest fails */
734b7061124SArun Thomas i = OPND(s);
735b7061124SArun Thomas assert(0 < i && i <= m->g->nsub);
736b7061124SArun Thomas offsave = m->pmatch[i].rm_so;
737b7061124SArun Thomas m->pmatch[i].rm_so = sp - m->offp;
738b7061124SArun Thomas dp = backref(m, sp, stop, ss+1, stopst, lev);
739b7061124SArun Thomas if (dp != NULL)
740b7061124SArun Thomas return(dp);
741b7061124SArun Thomas m->pmatch[i].rm_so = offsave;
742b7061124SArun Thomas return(NULL);
7432fe8fb19SBen Gras
744b7061124SArun Thomas case ORPAREN: /* must undo assignment if rest fails */
745b7061124SArun Thomas i = OPND(s);
746b7061124SArun Thomas assert(0 < i && i <= m->g->nsub);
747b7061124SArun Thomas offsave = m->pmatch[i].rm_eo;
748b7061124SArun Thomas m->pmatch[i].rm_eo = sp - m->offp;
749b7061124SArun Thomas dp = backref(m, sp, stop, ss+1, stopst, lev);
750b7061124SArun Thomas if (dp != NULL)
751b7061124SArun Thomas return(dp);
752b7061124SArun Thomas m->pmatch[i].rm_eo = offsave;
753b7061124SArun Thomas return(NULL);
7542fe8fb19SBen Gras
755b7061124SArun Thomas default: /* uh oh */
756b7061124SArun Thomas assert(nope);
757b7061124SArun Thomas break;
758b7061124SArun Thomas }
759b7061124SArun Thomas
760b7061124SArun Thomas /* "can't happen" */
761b7061124SArun Thomas assert(nope);
762b7061124SArun Thomas /* NOTREACHED */
7632fe8fb19SBen Gras return NULL;
764b7061124SArun Thomas }
765b7061124SArun Thomas
766b7061124SArun Thomas /*
767b7061124SArun Thomas - fast - step through the string at top speed
7682fe8fb19SBen Gras == static const char *fast(struct match *m, const char *start, \
7692fe8fb19SBen Gras == const char *stop, sopno startst, sopno stopst);
770b7061124SArun Thomas */
7712fe8fb19SBen Gras static const char * /* where tentative match ended, or NULL */
fast(struct match * m,const char * start,const char * stop,sopno startst,sopno stopst)7722fe8fb19SBen Gras fast(
7732fe8fb19SBen Gras struct match *m,
7742fe8fb19SBen Gras const char *start,
7752fe8fb19SBen Gras const char *stop,
7762fe8fb19SBen Gras sopno startst,
7772fe8fb19SBen Gras sopno stopst)
778b7061124SArun Thomas {
7792fe8fb19SBen Gras states st = m->st;
7802fe8fb19SBen Gras states fresh = m->fresh;
7812fe8fb19SBen Gras states tmp = m->tmp;
7822fe8fb19SBen Gras const char *p = start;
7832fe8fb19SBen Gras int c = (start == m->beginp) ? OUT : *(start-1);
7842fe8fb19SBen Gras int lastc; /* previous c */
7852fe8fb19SBen Gras int flagch;
786*f14fb602SLionel Sambuc size_t i;
7872fe8fb19SBen Gras const char *coldp; /* last p after which no match was underway */
7882fe8fb19SBen Gras
7892fe8fb19SBen Gras _DIAGASSERT(m != NULL);
7902fe8fb19SBen Gras _DIAGASSERT(start != NULL);
7912fe8fb19SBen Gras _DIAGASSERT(stop != NULL);
792b7061124SArun Thomas
793b7061124SArun Thomas CLEAR(st);
794b7061124SArun Thomas SET1(st, startst);
795b7061124SArun Thomas st = step(m->g, startst, stopst, st, NOTHING, st);
796b7061124SArun Thomas ASSIGN(fresh, st);
797b7061124SArun Thomas SP("start", st, *p);
798b7061124SArun Thomas coldp = NULL;
799b7061124SArun Thomas for (;;) {
800b7061124SArun Thomas /* next character */
801b7061124SArun Thomas lastc = c;
802b7061124SArun Thomas c = (p == m->endp) ? OUT : *p;
803b7061124SArun Thomas if (EQ(st, fresh))
804b7061124SArun Thomas coldp = p;
805b7061124SArun Thomas
806b7061124SArun Thomas /* is there an EOL and/or BOL between lastc and c? */
807b7061124SArun Thomas flagch = '\0';
808b7061124SArun Thomas i = 0;
809b7061124SArun Thomas if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
810b7061124SArun Thomas (lastc == OUT && !(m->eflags®_NOTBOL)) ) {
811b7061124SArun Thomas flagch = BOL;
812b7061124SArun Thomas i = m->g->nbol;
813b7061124SArun Thomas }
814b7061124SArun Thomas if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
815b7061124SArun Thomas (c == OUT && !(m->eflags®_NOTEOL)) ) {
816b7061124SArun Thomas flagch = (flagch == BOL) ? BOLEOL : EOL;
817b7061124SArun Thomas i += m->g->neol;
818b7061124SArun Thomas }
819b7061124SArun Thomas if (i != 0) {
820b7061124SArun Thomas for (; i > 0; i--)
821b7061124SArun Thomas st = step(m->g, startst, stopst, st, flagch, st);
822b7061124SArun Thomas SP("boleol", st, c);
823b7061124SArun Thomas }
824b7061124SArun Thomas
825b7061124SArun Thomas /* how about a word boundary? */
826b7061124SArun Thomas if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
827b7061124SArun Thomas (c != OUT && ISWORD(c)) ) {
828b7061124SArun Thomas flagch = BOW;
829b7061124SArun Thomas }
830b7061124SArun Thomas if ( (lastc != OUT && ISWORD(lastc)) &&
831b7061124SArun Thomas (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
832b7061124SArun Thomas flagch = EOW;
833b7061124SArun Thomas }
834b7061124SArun Thomas if (flagch == BOW || flagch == EOW) {
835b7061124SArun Thomas st = step(m->g, startst, stopst, st, flagch, st);
836b7061124SArun Thomas SP("boweow", st, c);
837b7061124SArun Thomas }
838b7061124SArun Thomas
839b7061124SArun Thomas /* are we done? */
840b7061124SArun Thomas if (ISSET(st, stopst) || p == stop)
841b7061124SArun Thomas break; /* NOTE BREAK OUT */
842b7061124SArun Thomas
843b7061124SArun Thomas /* no, we must deal with this character */
844b7061124SArun Thomas ASSIGN(tmp, st);
845b7061124SArun Thomas ASSIGN(st, fresh);
846b7061124SArun Thomas assert(c != OUT);
847b7061124SArun Thomas st = step(m->g, startst, stopst, tmp, c, st);
848b7061124SArun Thomas SP("aft", st, c);
849b7061124SArun Thomas assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
850b7061124SArun Thomas p++;
851b7061124SArun Thomas }
852b7061124SArun Thomas
853b7061124SArun Thomas assert(coldp != NULL);
854b7061124SArun Thomas m->coldp = coldp;
855b7061124SArun Thomas if (ISSET(st, stopst))
856b7061124SArun Thomas return(p+1);
857b7061124SArun Thomas else
858b7061124SArun Thomas return(NULL);
859b7061124SArun Thomas }
860b7061124SArun Thomas
861b7061124SArun Thomas /*
862b7061124SArun Thomas - slow - step through the string more deliberately
8632fe8fb19SBen Gras == static const char *slow(struct match *m, const char *start, \
8642fe8fb19SBen Gras == const char *stop, sopno startst, sopno stopst);
865b7061124SArun Thomas */
8662fe8fb19SBen Gras static const char * /* where it ended */
slow(struct match * m,const char * start,const char * stop,sopno startst,sopno stopst)8672fe8fb19SBen Gras slow(
8682fe8fb19SBen Gras struct match *m,
8692fe8fb19SBen Gras const char *start,
8702fe8fb19SBen Gras const char *stop,
8712fe8fb19SBen Gras sopno startst,
8722fe8fb19SBen Gras sopno stopst)
873b7061124SArun Thomas {
8742fe8fb19SBen Gras states st = m->st;
8752fe8fb19SBen Gras states empty = m->empty;
8762fe8fb19SBen Gras states tmp = m->tmp;
8772fe8fb19SBen Gras const char *p = start;
8782fe8fb19SBen Gras int c = (start == m->beginp) ? OUT : *(start-1);
8792fe8fb19SBen Gras int lastc; /* previous c */
8802fe8fb19SBen Gras int flagch;
881*f14fb602SLionel Sambuc size_t i;
8822fe8fb19SBen Gras const char *matchp; /* last p at which a match ended */
8832fe8fb19SBen Gras
8842fe8fb19SBen Gras _DIAGASSERT(m != NULL);
8852fe8fb19SBen Gras _DIAGASSERT(start != NULL);
8862fe8fb19SBen Gras _DIAGASSERT(stop != NULL);
887b7061124SArun Thomas
888b7061124SArun Thomas AT("slow", start, stop, startst, stopst);
889b7061124SArun Thomas CLEAR(st);
890b7061124SArun Thomas SET1(st, startst);
891b7061124SArun Thomas SP("sstart", st, *p);
892b7061124SArun Thomas st = step(m->g, startst, stopst, st, NOTHING, st);
893b7061124SArun Thomas matchp = NULL;
894b7061124SArun Thomas for (;;) {
895b7061124SArun Thomas /* next character */
896b7061124SArun Thomas lastc = c;
897b7061124SArun Thomas c = (p == m->endp) ? OUT : *p;
898b7061124SArun Thomas
899b7061124SArun Thomas /* is there an EOL and/or BOL between lastc and c? */
900b7061124SArun Thomas flagch = '\0';
901b7061124SArun Thomas i = 0;
902b7061124SArun Thomas if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
903b7061124SArun Thomas (lastc == OUT && !(m->eflags®_NOTBOL)) ) {
904b7061124SArun Thomas flagch = BOL;
905b7061124SArun Thomas i = m->g->nbol;
906b7061124SArun Thomas }
907b7061124SArun Thomas if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
908b7061124SArun Thomas (c == OUT && !(m->eflags®_NOTEOL)) ) {
909b7061124SArun Thomas flagch = (flagch == BOL) ? BOLEOL : EOL;
910b7061124SArun Thomas i += m->g->neol;
911b7061124SArun Thomas }
912b7061124SArun Thomas if (i != 0) {
913b7061124SArun Thomas for (; i > 0; i--)
914b7061124SArun Thomas st = step(m->g, startst, stopst, st, flagch, st);
915b7061124SArun Thomas SP("sboleol", st, c);
916b7061124SArun Thomas }
917b7061124SArun Thomas
918b7061124SArun Thomas /* how about a word boundary? */
919b7061124SArun Thomas if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
920b7061124SArun Thomas (c != OUT && ISWORD(c)) ) {
921b7061124SArun Thomas flagch = BOW;
922b7061124SArun Thomas }
923b7061124SArun Thomas if ( (lastc != OUT && ISWORD(lastc)) &&
924b7061124SArun Thomas (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
925b7061124SArun Thomas flagch = EOW;
926b7061124SArun Thomas }
927b7061124SArun Thomas if (flagch == BOW || flagch == EOW) {
928b7061124SArun Thomas st = step(m->g, startst, stopst, st, flagch, st);
929b7061124SArun Thomas SP("sboweow", st, c);
930b7061124SArun Thomas }
931b7061124SArun Thomas
932b7061124SArun Thomas /* are we done? */
933b7061124SArun Thomas if (ISSET(st, stopst))
934b7061124SArun Thomas matchp = p;
935b7061124SArun Thomas if (EQ(st, empty) || p == stop)
936b7061124SArun Thomas break; /* NOTE BREAK OUT */
937b7061124SArun Thomas
938b7061124SArun Thomas /* no, we must deal with this character */
939b7061124SArun Thomas ASSIGN(tmp, st);
940b7061124SArun Thomas ASSIGN(st, empty);
941b7061124SArun Thomas assert(c != OUT);
942b7061124SArun Thomas st = step(m->g, startst, stopst, tmp, c, st);
943b7061124SArun Thomas SP("saft", st, c);
944b7061124SArun Thomas assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
945b7061124SArun Thomas p++;
946b7061124SArun Thomas }
947b7061124SArun Thomas
948b7061124SArun Thomas return(matchp);
949b7061124SArun Thomas }
950b7061124SArun Thomas
951b7061124SArun Thomas
952b7061124SArun Thomas /*
953b7061124SArun Thomas - step - map set of states reachable before char to set reachable after
9542fe8fb19SBen Gras == static states step(struct re_guts *g, sopno start, sopno stop, \
9552fe8fb19SBen Gras == states bef, int ch, states aft);
956b7061124SArun Thomas == #define BOL (OUT+1)
957b7061124SArun Thomas == #define EOL (BOL+1)
958b7061124SArun Thomas == #define BOLEOL (BOL+2)
959b7061124SArun Thomas == #define NOTHING (BOL+3)
960b7061124SArun Thomas == #define BOW (BOL+4)
961b7061124SArun Thomas == #define EOW (BOL+5)
962b7061124SArun Thomas == #define CODEMAX (BOL+5) // highest code used
963b7061124SArun Thomas == #define NONCHAR(c) ((c) > CHAR_MAX)
964b7061124SArun Thomas == #define NNONCHAR (CODEMAX-CHAR_MAX)
965b7061124SArun Thomas */
966b7061124SArun Thomas static states
step(struct re_guts * g,sopno start,sopno stop,states bef,int ch,states aft)9672fe8fb19SBen Gras step(
9682fe8fb19SBen Gras struct re_guts *g,
9692fe8fb19SBen Gras sopno start, /* start state within strip */
9702fe8fb19SBen Gras sopno stop, /* state after stop state within strip */
9712fe8fb19SBen Gras states bef, /* states reachable before */
9722fe8fb19SBen Gras int ch, /* character or NONCHAR code */
9732fe8fb19SBen Gras states aft) /* states already known reachable after */
974b7061124SArun Thomas {
9752fe8fb19SBen Gras cset *cs;
9762fe8fb19SBen Gras sop s;
9772fe8fb19SBen Gras sopno pc;
9782fe8fb19SBen Gras onestate here; /* note, macros know this name */
9792fe8fb19SBen Gras sopno look;
9802fe8fb19SBen Gras int i;
9812fe8fb19SBen Gras
9822fe8fb19SBen Gras _DIAGASSERT(g != NULL);
983b7061124SArun Thomas
984b7061124SArun Thomas for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
985b7061124SArun Thomas s = g->strip[pc];
986b7061124SArun Thomas switch (OP(s)) {
987b7061124SArun Thomas case OEND:
988b7061124SArun Thomas assert(pc == stop-1);
989b7061124SArun Thomas break;
990b7061124SArun Thomas case OCHAR:
991b7061124SArun Thomas /* only characters can match */
992b7061124SArun Thomas assert(!NONCHAR(ch) || ch != (char)OPND(s));
993b7061124SArun Thomas if (ch == (char)OPND(s))
994b7061124SArun Thomas FWD(aft, bef, 1);
995b7061124SArun Thomas break;
996b7061124SArun Thomas case OBOL:
997b7061124SArun Thomas if (ch == BOL || ch == BOLEOL)
998b7061124SArun Thomas FWD(aft, bef, 1);
999b7061124SArun Thomas break;
1000b7061124SArun Thomas case OEOL:
1001b7061124SArun Thomas if (ch == EOL || ch == BOLEOL)
1002b7061124SArun Thomas FWD(aft, bef, 1);
1003b7061124SArun Thomas break;
1004b7061124SArun Thomas case OBOW:
1005b7061124SArun Thomas if (ch == BOW)
1006b7061124SArun Thomas FWD(aft, bef, 1);
1007b7061124SArun Thomas break;
1008b7061124SArun Thomas case OEOW:
1009b7061124SArun Thomas if (ch == EOW)
1010b7061124SArun Thomas FWD(aft, bef, 1);
1011b7061124SArun Thomas break;
1012b7061124SArun Thomas case OANY:
1013b7061124SArun Thomas if (!NONCHAR(ch))
1014b7061124SArun Thomas FWD(aft, bef, 1);
1015b7061124SArun Thomas break;
1016b7061124SArun Thomas case OANYOF:
1017b7061124SArun Thomas cs = &g->sets[OPND(s)];
1018b7061124SArun Thomas if (!NONCHAR(ch) && CHIN(cs, ch))
1019b7061124SArun Thomas FWD(aft, bef, 1);
1020b7061124SArun Thomas break;
1021b7061124SArun Thomas case OBACK_: /* ignored here */
1022b7061124SArun Thomas case O_BACK:
1023b7061124SArun Thomas FWD(aft, aft, 1);
1024b7061124SArun Thomas break;
1025b7061124SArun Thomas case OPLUS_: /* forward, this is just an empty */
1026b7061124SArun Thomas FWD(aft, aft, 1);
1027b7061124SArun Thomas break;
1028b7061124SArun Thomas case O_PLUS: /* both forward and back */
1029b7061124SArun Thomas FWD(aft, aft, 1);
1030b7061124SArun Thomas i = ISSETBACK(aft, OPND(s));
1031b7061124SArun Thomas BACK(aft, aft, OPND(s));
1032b7061124SArun Thomas if (!i && ISSETBACK(aft, OPND(s))) {
1033b7061124SArun Thomas /* oho, must reconsider loop body */
1034b7061124SArun Thomas pc -= OPND(s) + 1;
1035b7061124SArun Thomas INIT(here, pc);
1036b7061124SArun Thomas }
1037b7061124SArun Thomas break;
1038b7061124SArun Thomas case OQUEST_: /* two branches, both forward */
1039b7061124SArun Thomas FWD(aft, aft, 1);
1040b7061124SArun Thomas FWD(aft, aft, OPND(s));
1041b7061124SArun Thomas break;
1042b7061124SArun Thomas case O_QUEST: /* just an empty */
1043b7061124SArun Thomas FWD(aft, aft, 1);
1044b7061124SArun Thomas break;
1045b7061124SArun Thomas case OLPAREN: /* not significant here */
1046b7061124SArun Thomas case ORPAREN:
1047b7061124SArun Thomas FWD(aft, aft, 1);
1048b7061124SArun Thomas break;
1049b7061124SArun Thomas case OCH_: /* mark the first two branches */
1050b7061124SArun Thomas FWD(aft, aft, 1);
1051b7061124SArun Thomas assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1052b7061124SArun Thomas FWD(aft, aft, OPND(s));
1053b7061124SArun Thomas break;
1054b7061124SArun Thomas case OOR1: /* done a branch, find the O_CH */
1055b7061124SArun Thomas if (ISSTATEIN(aft, here)) {
1056b7061124SArun Thomas for (look = 1;
1057b7061124SArun Thomas OP(s = g->strip[pc+look]) != O_CH;
1058b7061124SArun Thomas look += OPND(s))
1059b7061124SArun Thomas assert(OP(s) == OOR2);
1060b7061124SArun Thomas FWD(aft, aft, look);
1061b7061124SArun Thomas }
1062b7061124SArun Thomas break;
1063b7061124SArun Thomas case OOR2: /* propagate OCH_'s marking */
1064b7061124SArun Thomas FWD(aft, aft, 1);
1065b7061124SArun Thomas if (OP(g->strip[pc+OPND(s)]) != O_CH) {
1066b7061124SArun Thomas assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1067b7061124SArun Thomas FWD(aft, aft, OPND(s));
1068b7061124SArun Thomas }
1069b7061124SArun Thomas break;
1070b7061124SArun Thomas case O_CH: /* just empty */
1071b7061124SArun Thomas FWD(aft, aft, 1);
1072b7061124SArun Thomas break;
1073b7061124SArun Thomas default: /* ooooops... */
1074b7061124SArun Thomas assert(nope);
1075b7061124SArun Thomas break;
1076b7061124SArun Thomas }
1077b7061124SArun Thomas }
1078b7061124SArun Thomas
1079b7061124SArun Thomas return(aft);
1080b7061124SArun Thomas }
1081b7061124SArun Thomas
1082b7061124SArun Thomas #ifdef REDEBUG
1083b7061124SArun Thomas /*
1084b7061124SArun Thomas - print - print a set of states
1085b7061124SArun Thomas == #ifdef REDEBUG
1086b7061124SArun Thomas == static void print(struct match *m, char *caption, states st, \
1087b7061124SArun Thomas == int ch, FILE *d);
1088b7061124SArun Thomas == #endif
1089b7061124SArun Thomas */
1090b7061124SArun Thomas static void
print(struct match * m,char * caption,states st,int ch,FILE * d)10912fe8fb19SBen Gras print(
10922fe8fb19SBen Gras struct match *m,
10932fe8fb19SBen Gras char *caption,
10942fe8fb19SBen Gras states st,
10952fe8fb19SBen Gras int ch,
10962fe8fb19SBen Gras FILE *d)
1097b7061124SArun Thomas {
10982fe8fb19SBen Gras struct re_guts *g = m->g;
10992fe8fb19SBen Gras int i;
11002fe8fb19SBen Gras int first = 1;
11012fe8fb19SBen Gras
11022fe8fb19SBen Gras _DIAGASSERT(m != NULL);
11032fe8fb19SBen Gras _DIAGASSERT(caption != NULL);
1104b7061124SArun Thomas
1105b7061124SArun Thomas if (!(m->eflags®_TRACE))
1106b7061124SArun Thomas return;
1107b7061124SArun Thomas
11082fe8fb19SBen Gras _DIAGASSERT(d != NULL);
11092fe8fb19SBen Gras
1110b7061124SArun Thomas fprintf(d, "%s", caption);
1111b7061124SArun Thomas if (ch != '\0')
1112b7061124SArun Thomas fprintf(d, " %s", pchar(ch));
1113b7061124SArun Thomas for (i = 0; i < g->nstates; i++)
1114b7061124SArun Thomas if (ISSET(st, i)) {
1115b7061124SArun Thomas fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1116b7061124SArun Thomas first = 0;
1117b7061124SArun Thomas }
1118b7061124SArun Thomas fprintf(d, "\n");
1119b7061124SArun Thomas }
1120b7061124SArun Thomas
1121b7061124SArun Thomas /*
1122b7061124SArun Thomas - at - print current situation
1123b7061124SArun Thomas == #ifdef REDEBUG
1124b7061124SArun Thomas == static void at(struct match *m, char *title, char *start, char *stop, \
1125b7061124SArun Thomas == sopno startst, sopno stopst);
1126b7061124SArun Thomas == #endif
1127b7061124SArun Thomas */
1128b7061124SArun Thomas static void
at(struct match * m,char * title,char * start,char * stop,sopno startst,sopno stopst)11292fe8fb19SBen Gras at(
11302fe8fb19SBen Gras struct match *m,
11312fe8fb19SBen Gras char *title,
11322fe8fb19SBen Gras char *start,
11332fe8fb19SBen Gras char *stop,
11342fe8fb19SBen Gras sopno startst,
11352fe8fb19SBen Gras sopno stopst)
1136b7061124SArun Thomas {
11372fe8fb19SBen Gras
11382fe8fb19SBen Gras _DIAGASSERT(m != NULL);
11392fe8fb19SBen Gras _DIAGASSERT(title != NULL);
11402fe8fb19SBen Gras _DIAGASSERT(start != NULL);
11412fe8fb19SBen Gras _DIAGASSERT(stop != NULL);
11422fe8fb19SBen Gras
1143b7061124SArun Thomas if (!(m->eflags®_TRACE))
1144b7061124SArun Thomas return;
1145b7061124SArun Thomas
1146b7061124SArun Thomas printf("%s %s-", title, pchar(*start));
1147b7061124SArun Thomas printf("%s ", pchar(*stop));
1148b7061124SArun Thomas printf("%ld-%ld\n", (long)startst, (long)stopst);
1149b7061124SArun Thomas }
1150b7061124SArun Thomas
1151b7061124SArun Thomas #ifndef PCHARDONE
1152b7061124SArun Thomas #define PCHARDONE /* never again */
1153b7061124SArun Thomas /*
1154b7061124SArun Thomas - pchar - make a character printable
1155b7061124SArun Thomas == #ifdef REDEBUG
1156b7061124SArun Thomas == static char *pchar(int ch);
1157b7061124SArun Thomas == #endif
1158b7061124SArun Thomas *
1159b7061124SArun Thomas * Is this identical to regchar() over in debug.c? Well, yes. But a
1160b7061124SArun Thomas * duplicate here avoids having a debugging-capable regexec.o tied to
1161b7061124SArun Thomas * a matching debug.o, and this is convenient. It all disappears in
1162b7061124SArun Thomas * the non-debug compilation anyway, so it doesn't matter much.
1163b7061124SArun Thomas */
1164b7061124SArun Thomas static char * /* -> representation */
pchar(int ch)11652fe8fb19SBen Gras pchar(
11662fe8fb19SBen Gras int ch)
1167b7061124SArun Thomas {
1168b7061124SArun Thomas static char pbuf[10];
1169b7061124SArun Thomas
1170b7061124SArun Thomas if (isprint(ch) || ch == ' ')
11712fe8fb19SBen Gras (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1172b7061124SArun Thomas else
11732fe8fb19SBen Gras (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1174b7061124SArun Thomas return(pbuf);
1175b7061124SArun Thomas }
1176b7061124SArun Thomas #endif
1177b7061124SArun Thomas #endif
1178b7061124SArun Thomas
1179b7061124SArun Thomas #undef matcher
1180b7061124SArun Thomas #undef fast
1181b7061124SArun Thomas #undef slow
1182b7061124SArun Thomas #undef dissect
1183b7061124SArun Thomas #undef backref
1184b7061124SArun Thomas #undef step
1185b7061124SArun Thomas #undef print
1186b7061124SArun Thomas #undef at
1187b7061124SArun Thomas #undef match
11882fe8fb19SBen Gras #undef nope
1189