155845Sbostic /*- 255845Sbostic * Copyright (c) 1992 Henry Spencer. 355845Sbostic * Copyright (c) 1992 The Regents of the University of California. 455845Sbostic * All rights reserved. 555845Sbostic * 655845Sbostic * This code is derived from software contributed to Berkeley by 755845Sbostic * Henry Spencer of the University of Toronto. 855845Sbostic * 955845Sbostic * %sccs.include.redist.c% 1055845Sbostic * 11*55882Sbostic * @(#)engine.c 5.2 (Berkeley) 08/10/92 1255845Sbostic */ 1355845Sbostic 1455845Sbostic /* 1555845Sbostic * The matching engine and friends. This file is #included by regexec.c 1655845Sbostic * after suitable #defines of a variety of macros used herein, so that 1755845Sbostic * different state representations can be used without duplicating masses 1855845Sbostic * of code. 1955845Sbostic */ 2055845Sbostic 2155845Sbostic #ifdef SNAMES 2255845Sbostic #define matcher smatcher 2355845Sbostic #define fast sfast 2455845Sbostic #define slow sslow 2555845Sbostic #define dissect sdissect 2655845Sbostic #define backref sbackref 2755845Sbostic #define expand sexpand 2855845Sbostic #define step sstep 2955845Sbostic #define print sprint 3055845Sbostic #define match smat 3155845Sbostic #endif 3255845Sbostic #ifdef LNAMES 3355845Sbostic #define matcher lmatcher 3455845Sbostic #define fast lfast 3555845Sbostic #define slow lslow 3655845Sbostic #define dissect ldissect 3755845Sbostic #define backref lbackref 3855845Sbostic #define expand lexpand 3955845Sbostic #define step lstep 4055845Sbostic #define print lprint 4155845Sbostic #define match lmat 4255845Sbostic #endif 4355845Sbostic 4455845Sbostic /* another structure passed up and down to avoid zillions of parameters */ 4555845Sbostic struct match { 4655845Sbostic struct re_guts *g; 4755845Sbostic int eflags; 4855845Sbostic regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 4955845Sbostic uchar *offp; /* offsets work from here */ 5055845Sbostic uchar *beginp; /* start of string -- virtual NUL precedes */ 5155845Sbostic uchar *endp; /* end of string -- virtual NUL here */ 5255845Sbostic uchar *coldp; /* can be no match starting before here */ 5355845Sbostic uchar **lastpos; /* [nplus+1] */ 5455845Sbostic STATEVARS; 5555845Sbostic states st; /* current states */ 5655845Sbostic states fresh; /* states for a fresh start */ 5755845Sbostic states tmp; /* temporary */ 5855845Sbostic states empty; /* empty set of states */ 5955845Sbostic }; 6055845Sbostic 6155845Sbostic #ifndef NDEBUG 6255845Sbostic STATIC void print(); 6355845Sbostic extern char *regchar(); 6455845Sbostic #define SP(t, s, c) { if (m->eflags®_TRACE) print(m->g, t, s, c, stdout); } 6555845Sbostic #define DO(t, p1, p2, s1, s2) { if (m->eflags®_TRACE) { \ 6655845Sbostic printf("%s %s-", t, regchar(*(p1))); \ 6755845Sbostic printf("%s ", regchar(*(p2))); \ 6855845Sbostic printf("%ld-%ld\n", s1, s2); } } 6955845Sbostic #else 7055845Sbostic #define SP(t, s, c) /* nothing */ 7155845Sbostic #define DO(t, p1, p2, s1, s2) /* nothing */ 7255845Sbostic #endif 7355845Sbostic 7455845Sbostic STATIC uchar *fast(); 7555845Sbostic STATIC uchar *slow(); 7655845Sbostic STATIC uchar *dissect(); 7755845Sbostic STATIC uchar *backref(); 7855845Sbostic STATIC states expand(); 7955845Sbostic STATIC states step(); 8055845Sbostic 8155845Sbostic /* 8255845Sbostic - matcher - the actual matching engine 8355845Sbostic */ 8455845Sbostic static int /* 0 success, REG_NOMATCH failure */ 8555845Sbostic matcher(g, string, nmatch, pmatch, eflags) 8655845Sbostic register struct re_guts *g; 8755845Sbostic uchar *string; 8855845Sbostic size_t nmatch; 8955845Sbostic regmatch_t pmatch[]; 9055845Sbostic int eflags; 9155845Sbostic { 9255845Sbostic register uchar *endp; 9355845Sbostic register int i; 9455845Sbostic struct match mv; 9555845Sbostic register struct match *m = &mv; 9655845Sbostic register uchar *dp; 9755845Sbostic const register sopno gf = g->firststate+1; /* +1 for OEND */ 9855845Sbostic const register sopno gl = g->laststate; 9955845Sbostic uchar *start; 10055845Sbostic uchar *stop; 10155845Sbostic 10255845Sbostic if (g->cflags®_NOSUB) 10355845Sbostic nmatch = 0; /* simplify tests */ 10455845Sbostic if (eflags®_STARTEND) { 10555845Sbostic start = string + pmatch[0].rm_so; 10655845Sbostic stop = string + pmatch[0].rm_eo; 10755845Sbostic } else { 10855845Sbostic start = string; 10955845Sbostic stop = start + strlen((char *)start); 11055845Sbostic } 11155845Sbostic 112*55882Sbostic /* prescreening; this does wonders for this rather slow code */ 113*55882Sbostic if (g->must != NULL) { 114*55882Sbostic for (dp = start; dp < stop; dp++) 115*55882Sbostic if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen && 116*55882Sbostic strncmp((char *)dp, g->must, (size_t)g->mlen) == 0) 117*55882Sbostic break; 118*55882Sbostic if (dp == stop) /* we didn't find g->must */ 119*55882Sbostic return(REG_NOMATCH); 120*55882Sbostic } 121*55882Sbostic 12255845Sbostic /* match struct setup */ 12355845Sbostic m->g = g; 12455845Sbostic m->eflags = eflags; 12555845Sbostic m->pmatch = NULL; 12655845Sbostic m->lastpos = NULL; 12755845Sbostic m->offp = string; 12855845Sbostic m->beginp = start; 12955845Sbostic m->endp = stop; 13055845Sbostic STATESETUP(m, 4); 13155845Sbostic SETUP(m->st); 13255845Sbostic SETUP(m->fresh); 13355845Sbostic SETUP(m->tmp); 13455845Sbostic SETUP(m->empty); 13555845Sbostic CLEAR(m->empty); 13655845Sbostic 13755845Sbostic /* this loop does only one repetition except for backrefs */ 13855845Sbostic for (;;) { 13955845Sbostic endp = fast(m, start, stop, gf, gl); 14055845Sbostic if (endp == NULL) { /* a miss */ 14155845Sbostic STATETEARDOWN(m); 14255845Sbostic return(REG_NOMATCH); 14355845Sbostic } 14455845Sbostic if (nmatch == 0 && !g->backrefs) 14555845Sbostic break; /* no further info needed */ 14655845Sbostic 14755845Sbostic /* where? */ 14855845Sbostic assert(m->coldp != NULL); 14955845Sbostic for (;;) { 15055845Sbostic endp = slow(m, m->coldp, stop, gf, gl); 15155845Sbostic if (endp != NULL) 15255845Sbostic break; 15355845Sbostic assert(*m->coldp != '\0'); 15455845Sbostic m->coldp++; 15555845Sbostic } 15655845Sbostic if (nmatch == 1 && !g->backrefs) 15755845Sbostic break; /* no further info needed */ 15855845Sbostic 15955845Sbostic /* oh my, he wants the subexpressions... */ 16055845Sbostic if (m->pmatch == NULL) 16155845Sbostic m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 16255845Sbostic sizeof(regmatch_t)); 16355845Sbostic if (m->pmatch == NULL) { 16455845Sbostic STATETEARDOWN(m); 16555845Sbostic return(REG_ESPACE); 16655845Sbostic } 16755845Sbostic for (i = 1; i <= m->g->nsub; i++) { 16855845Sbostic m->pmatch[i].rm_so = -1; 16955845Sbostic m->pmatch[i].rm_eo = -1; 17055845Sbostic } 17155845Sbostic if (!g->backrefs) 17255845Sbostic dp = dissect(m, m->coldp, endp, gf, gl); 17355845Sbostic else { 17455845Sbostic if (g->nplus > 0 && m->lastpos == NULL) 17555845Sbostic m->lastpos = (uchar **)malloc((g->nplus+1) * 17655845Sbostic sizeof(uchar *)); 17755845Sbostic if (g->nplus > 0 && m->lastpos == NULL) { 17855845Sbostic free(m->pmatch); 17955845Sbostic STATETEARDOWN(m); 18055845Sbostic return(REG_ESPACE); 18155845Sbostic } 18255845Sbostic dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 18355845Sbostic } 18455845Sbostic if (dp != NULL) 18555845Sbostic break; 18655845Sbostic 18755845Sbostic /* uh-oh... we couldn't find a subexpression-level match */ 18855845Sbostic assert(g->backrefs); /* must be back references doing it */ 18955845Sbostic assert(g->nplus == 0 || m->lastpos != NULL); 19055845Sbostic while (dp == NULL && endp > m->coldp && 19155845Sbostic (endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) { 19255845Sbostic /* try it on a shorter possibility */ 19355845Sbostic for (i = 1; i <= m->g->nsub; i++) { 19455845Sbostic m->pmatch[i].rm_so = -1; 19555845Sbostic m->pmatch[i].rm_eo = -1; 19655845Sbostic } 19755845Sbostic dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 19855845Sbostic } 19955845Sbostic assert(dp == NULL || dp == endp); 20055845Sbostic if (dp != NULL) /* found a shorter one */ 20155845Sbostic break; 20255845Sbostic 20355845Sbostic /* despite initial appearances, there is no match here */ 20455845Sbostic start = m->coldp + 1; /* recycle starting later */ 20555845Sbostic assert(start <= stop); 20655845Sbostic } 20755845Sbostic 20855845Sbostic /* fill in the details if requested */ 20955845Sbostic if (nmatch > 0) { 21055845Sbostic pmatch[0].rm_so = m->coldp - m->offp; 21155845Sbostic pmatch[0].rm_eo = endp - m->offp; 21255845Sbostic } 21355845Sbostic if (nmatch > 1) { 21455845Sbostic assert(m->pmatch != NULL); 21555845Sbostic for (i = 1; i < nmatch; i++) 21655845Sbostic if (i <= m->g->nsub) 21755845Sbostic pmatch[i] = m->pmatch[i]; 21855845Sbostic else { 21955845Sbostic pmatch[i].rm_so = -1; 22055845Sbostic pmatch[i].rm_eo = -1; 22155845Sbostic } 22255845Sbostic } 22355845Sbostic 22455845Sbostic if (m->pmatch != NULL) 22555845Sbostic free((char *)m->pmatch); 22655845Sbostic if (m->lastpos != NULL) 22755845Sbostic free((char *)m->lastpos); 22855845Sbostic STATETEARDOWN(m); 22955845Sbostic return(0); 23055845Sbostic } 23155845Sbostic 23255845Sbostic /* 23355845Sbostic - dissect - figure out what matched what, no back references 23455845Sbostic */ 23555845Sbostic static uchar * /* == stop (success) always */ 23655845Sbostic dissect(m, start, stop, startst, stopst) 23755845Sbostic register struct match *m; 23855845Sbostic uchar *start; 23955845Sbostic uchar *stop; 24055845Sbostic sopno startst; 24155845Sbostic sopno stopst; 24255845Sbostic { 24355845Sbostic register int i; 24455845Sbostic register sopno ss; /* start sop of current subRE */ 24555845Sbostic register sopno es; /* end sop of current subRE */ 24655845Sbostic register uchar *sp; /* start of string matched by it */ 24755845Sbostic register uchar *stp; /* string matched by it cannot pass here */ 24855845Sbostic register uchar *rest; /* start of rest of string */ 24955845Sbostic register uchar *tail; /* string unmatched by rest of RE */ 25055845Sbostic register sopno ssub; /* start sop of subsubRE */ 25155845Sbostic register sopno esub; /* end sop of subsubRE */ 25255845Sbostic register uchar *ssp; /* start of string matched by subsubRE */ 25355845Sbostic register uchar *sep; /* end of string matched by subsubRE */ 25455845Sbostic register uchar *oldssp; /* previous ssp */ 25555845Sbostic register uchar *dp; 25655845Sbostic register size_t len; 25755845Sbostic 25855845Sbostic DO("diss", start, stop, startst, stopst); 25955845Sbostic sp = start; 26055845Sbostic for (ss = startst; ss < stopst; ss = es) { 26155845Sbostic /* identify end of subRE */ 26255845Sbostic es = ss; 26355845Sbostic switch (OP(m->g->strip[es])) { 26455845Sbostic case OPLUS_: 26555845Sbostic case OQUEST_: 26655845Sbostic es += OPND(m->g->strip[es]); 26755845Sbostic break; 26855845Sbostic case OCH_: 26955845Sbostic while (OP(m->g->strip[es]) != O_CH) 27055845Sbostic es += OPND(m->g->strip[es]); 27155845Sbostic break; 27255845Sbostic } 27355845Sbostic es++; 27455845Sbostic 27555845Sbostic /* figure out what it matched */ 27655845Sbostic switch (OP(m->g->strip[ss])) { 27755845Sbostic case OEND: 27855845Sbostic assert(0); 27955845Sbostic break; 28055845Sbostic case OCHAR: 28155845Sbostic sp++; 28255845Sbostic break; 28355845Sbostic case OBOL: 28455845Sbostic case OEOL: 28555845Sbostic break; 28655845Sbostic case OANY: 28755845Sbostic case OANYOF: 28855845Sbostic sp++; 28955845Sbostic break; 29055845Sbostic case OBACK_: 29155845Sbostic case O_BACK: 29255845Sbostic assert(0); 29355845Sbostic break; 29455845Sbostic /* cases where length of match is hard to find */ 29555845Sbostic case OQUEST_: 29655845Sbostic stp = stop; 29755845Sbostic for (;;) { 29855845Sbostic /* how long could this one be? */ 29955845Sbostic rest = slow(m, sp, stp, ss, es); 30055845Sbostic assert(rest != NULL); /* it did match */ 30155845Sbostic /* could the rest match the rest? */ 30255845Sbostic tail = slow(m, rest, stop, es, stopst); 30355845Sbostic if (tail == stop) 30455845Sbostic break; /* yes! */ 30555845Sbostic /* no -- try a shorter match for this one */ 30655845Sbostic stp = rest - 1; 30755845Sbostic assert(stp >= sp); /* it did work */ 30855845Sbostic } 30955845Sbostic ssub = ss + 1; 31055845Sbostic esub = es - 1; 31155845Sbostic /* did innards match? */ 31255845Sbostic if (slow(m, sp, rest, ssub, esub) != NULL) { 31355845Sbostic dp = dissect(m, sp, rest, ssub, esub); 31455845Sbostic assert(dp == rest); 31555845Sbostic } else /* no */ 31655845Sbostic assert(sp == rest); 31755845Sbostic sp = rest; 31855845Sbostic break; 31955845Sbostic case OPLUS_: 32055845Sbostic stp = stop; 32155845Sbostic for (;;) { 32255845Sbostic /* how long could this one be? */ 32355845Sbostic rest = slow(m, sp, stp, ss, es); 32455845Sbostic assert(rest != NULL); /* it did match */ 32555845Sbostic /* could the rest match the rest? */ 32655845Sbostic tail = slow(m, rest, stop, es, stopst); 32755845Sbostic if (tail == stop) 32855845Sbostic break; /* yes! */ 32955845Sbostic /* no -- try a shorter match for this one */ 33055845Sbostic stp = rest - 1; 33155845Sbostic assert(stp >= sp); /* it did work */ 33255845Sbostic } 33355845Sbostic ssub = ss + 1; 33455845Sbostic esub = es - 1; 33555845Sbostic ssp = sp; 33655845Sbostic oldssp = ssp; 33755845Sbostic for (;;) { /* find last match of innards */ 33855845Sbostic sep = slow(m, ssp, rest, ssub, esub); 33955845Sbostic if (sep == NULL || sep == ssp) 34055845Sbostic break; /* failed or matched null */ 34155845Sbostic oldssp = ssp; /* on to next try */ 34255845Sbostic ssp = sep; 34355845Sbostic } 34455845Sbostic if (sep == NULL) { 34555845Sbostic /* last successful match */ 34655845Sbostic sep = ssp; 34755845Sbostic ssp = oldssp; 34855845Sbostic } 34955845Sbostic assert(sep == rest); /* must exhaust substring */ 35055845Sbostic assert(slow(m, ssp, sep, ssub, esub) == rest); 35155845Sbostic dp = dissect(m, ssp, sep, ssub, esub); 35255845Sbostic assert(dp == sep); 35355845Sbostic sp = rest; 35455845Sbostic break; 35555845Sbostic case OCH_: 35655845Sbostic stp = stop; 35755845Sbostic for (;;) { 35855845Sbostic /* how long could this one be? */ 35955845Sbostic rest = slow(m, sp, stp, ss, es); 36055845Sbostic assert(rest != NULL); /* it did match */ 36155845Sbostic /* could the rest match the rest? */ 36255845Sbostic tail = slow(m, rest, stop, es, stopst); 36355845Sbostic if (tail == stop) 36455845Sbostic break; /* yes! */ 36555845Sbostic /* no -- try a shorter match for this one */ 36655845Sbostic stp = rest - 1; 36755845Sbostic assert(stp >= sp); /* it did work */ 36855845Sbostic } 36955845Sbostic ssub = ss + 1; 37055845Sbostic esub = ss + OPND(m->g->strip[ss]) - 1; 37155845Sbostic assert(OP(m->g->strip[esub]) == OOR1); 37255845Sbostic for (;;) { /* find first matching branch */ 37355845Sbostic if (slow(m, sp, rest, ssub, esub) == rest) 37455845Sbostic break; /* it matched all of it */ 37555845Sbostic /* that one missed, try next one */ 37655845Sbostic assert(OP(m->g->strip[esub]) == OOR1); 37755845Sbostic esub++; 37855845Sbostic assert(OP(m->g->strip[esub]) == OOR2); 37955845Sbostic ssub = esub + 1; 38055845Sbostic esub += OPND(m->g->strip[esub]); 38155845Sbostic if (OP(m->g->strip[esub]) == OOR2) 38255845Sbostic esub--; 38355845Sbostic else 38455845Sbostic assert(OP(m->g->strip[esub]) == O_CH); 38555845Sbostic } 38655845Sbostic dp = dissect(m, sp, rest, ssub, esub); 38755845Sbostic assert(dp == rest); 38855845Sbostic sp = rest; 38955845Sbostic break; 39055845Sbostic case O_PLUS: 39155845Sbostic case O_QUEST: 39255845Sbostic case OOR1: 39355845Sbostic case OOR2: 39455845Sbostic case O_CH: 39555845Sbostic assert(0); 39655845Sbostic break; 39755845Sbostic case OLPAREN: 39855845Sbostic i = OPND(m->g->strip[ss]); 39955845Sbostic m->pmatch[i].rm_so = sp - m->offp; 40055845Sbostic break; 40155845Sbostic case ORPAREN: 40255845Sbostic i = OPND(m->g->strip[ss]); 40355845Sbostic m->pmatch[i].rm_eo = sp - m->offp; 40455845Sbostic break; 40555845Sbostic default: /* uh oh */ 40655845Sbostic assert(0); 40755845Sbostic break; 40855845Sbostic } 40955845Sbostic } 41055845Sbostic 41155845Sbostic assert(sp == stop); 41255845Sbostic return(sp); 41355845Sbostic } 41455845Sbostic 41555845Sbostic /* 41655845Sbostic - backref - figure out what matched what, figuring in back references 41755845Sbostic */ 41855845Sbostic static uchar * /* == stop (success) or NULL (failure) */ 41955845Sbostic backref(m, start, stop, startst, stopst, lev) 42055845Sbostic register struct match *m; 42155845Sbostic uchar *start; 42255845Sbostic uchar *stop; 42355845Sbostic sopno startst; 42455845Sbostic sopno stopst; 42555845Sbostic sopno lev; /* PLUS nesting level */ 42655845Sbostic { 42755845Sbostic register int i; 42855845Sbostic register sopno ss; /* start sop of current subRE */ 42955845Sbostic register uchar *sp; /* start of string matched by it */ 43055845Sbostic register sopno ssub; /* start sop of subsubRE */ 43155845Sbostic register sopno esub; /* end sop of subsubRE */ 43255845Sbostic register uchar *ssp; /* start of string matched by subsubRE */ 43355845Sbostic register uchar *dp; 43455845Sbostic register size_t len; 43555845Sbostic register int hard; 43655845Sbostic register sop s; 43755845Sbostic register regoff_t offsave; 43855845Sbostic register cset *cs; 43955845Sbostic 44055845Sbostic DO("back", start, stop, startst, stopst); 44155845Sbostic sp = start; 44255845Sbostic 44355845Sbostic /* get as far as we can with easy stuff */ 44455845Sbostic hard = 0; 44555845Sbostic for (ss = startst; !hard && ss < stopst; ss++) 44655845Sbostic switch (OP(s = m->g->strip[ss])) { 44755845Sbostic case OCHAR: 44855845Sbostic if (sp == stop || *sp++ != OPND(s)) 44955845Sbostic return(NULL); 45055845Sbostic break; 45155845Sbostic case OANY: 45255845Sbostic if (sp == stop) 45355845Sbostic return(NULL); 45455845Sbostic sp++; 45555845Sbostic break; 45655845Sbostic case OANYOF: 45755845Sbostic cs = &m->g->sets[OPND(s)]; 45855845Sbostic if (sp == stop || !CHIN(cs, *sp++)) 45955845Sbostic return(NULL); 46055845Sbostic break; 46155845Sbostic case OBOL: 46255845Sbostic if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 46355845Sbostic (sp < m->endp && *(sp-1) == '\n' && 46455845Sbostic (m->g->cflags®_NEWLINE)) ) 46555845Sbostic { /* yes */ } 46655845Sbostic else 46755845Sbostic return(NULL); 46855845Sbostic break; 46955845Sbostic case OEOL: 47055845Sbostic if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 47155845Sbostic (sp < m->endp && *sp == '\n' && 47255845Sbostic (m->g->cflags®_NEWLINE)) ) 47355845Sbostic { /* yes */ } 47455845Sbostic else 47555845Sbostic return(NULL); 47655845Sbostic break; 47755845Sbostic case O_QUEST: 47855845Sbostic break; 47955845Sbostic case OOR1: /* matches null but needs to skip */ 48055845Sbostic ss++; 48155845Sbostic s = m->g->strip[ss]; 48255845Sbostic do { 48355845Sbostic assert(OP(s) == OOR2); 48455845Sbostic ss += OPND(s); 48555845Sbostic } while (OP(s = m->g->strip[ss]) != O_CH); 48655845Sbostic /* note that the ss++ gets us past the O_CH */ 48755845Sbostic break; 48855845Sbostic default: /* have to make a choice */ 48955845Sbostic hard = 1; 49055845Sbostic break; 49155845Sbostic } 49255845Sbostic if (!hard) { /* that was it! */ 49355845Sbostic if (sp != stop) 49455845Sbostic return(NULL); 49555845Sbostic return(sp); 49655845Sbostic } 49755845Sbostic ss--; /* adjust for the for's final increment */ 49855845Sbostic 49955845Sbostic /* the hard stuff */ 50055845Sbostic DO("hard", sp, stop, ss, stopst); 50155845Sbostic s = m->g->strip[ss]; 50255845Sbostic switch (OP(s)) { 50355845Sbostic case OBACK_: /* the vilest depths */ 50455845Sbostic i = OPND(s); 50555845Sbostic if (m->pmatch[i].rm_eo == -1) 50655845Sbostic return(NULL); 50755845Sbostic assert(m->pmatch[i].rm_so != -1); 50855845Sbostic len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 50955845Sbostic assert(stop - m->beginp >= len); 51055845Sbostic if (sp > stop - len) 51155845Sbostic return(NULL); /* not enough left to match */ 51255845Sbostic ssp = m->offp + m->pmatch[i].rm_so; 51355845Sbostic if (strncmp((char *)sp, (char *)ssp, len) != 0) 51455845Sbostic return(NULL); 51555845Sbostic while (m->g->strip[ss] != SOP(O_BACK, i)) 51655845Sbostic ss++; 51755845Sbostic return(backref(m, sp+len, stop, ss+1, stopst, lev)); 51855845Sbostic break; 51955845Sbostic case OQUEST_: /* to null or not */ 52055845Sbostic dp = backref(m, sp, stop, ss+1, stopst, lev); 52155845Sbostic if (dp != NULL) 52255845Sbostic return(dp); /* not */ 52355845Sbostic return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 52455845Sbostic break; 52555845Sbostic case OPLUS_: 52655845Sbostic assert(m->lastpos != NULL); 52755845Sbostic assert(lev+1 <= m->g->nplus); 52855845Sbostic m->lastpos[lev+1] = sp; 52955845Sbostic return(backref(m, sp, stop, ss+1, stopst, lev+1)); 53055845Sbostic break; 53155845Sbostic case O_PLUS: 53255845Sbostic if (sp == m->lastpos[lev]) /* last pass matched null */ 53355845Sbostic return(backref(m, sp, stop, ss+1, stopst, lev-1)); 53455845Sbostic /* try another pass */ 53555845Sbostic m->lastpos[lev] = sp; 53655845Sbostic dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 53755845Sbostic if (dp == NULL) 53855845Sbostic return(backref(m, sp, stop, ss+1, stopst, lev-1)); 53955845Sbostic else 54055845Sbostic return(dp); 54155845Sbostic break; 54255845Sbostic case OCH_: /* find the right one, if any */ 54355845Sbostic ssub = ss + 1; 54455845Sbostic esub = ss + OPND(s) - 1; 54555845Sbostic assert(OP(m->g->strip[esub]) == OOR1); 54655845Sbostic for (;;) { /* find first matching branch */ 54755845Sbostic dp = backref(m, sp, stop, ssub, esub, lev); 54855845Sbostic if (dp != NULL) 54955845Sbostic return(dp); 55055845Sbostic /* that one missed, try next one */ 55155845Sbostic if (OP(m->g->strip[esub]) == O_CH) 55255845Sbostic return(NULL); /* there is none */ 55355845Sbostic esub++; 55455845Sbostic assert(OP(m->g->strip[esub]) == OOR2); 55555845Sbostic ssub = esub + 1; 55655845Sbostic esub += OPND(m->g->strip[esub]); 55755845Sbostic if (OP(m->g->strip[esub]) == OOR2) 55855845Sbostic esub--; 55955845Sbostic else 56055845Sbostic assert(OP(m->g->strip[esub]) == O_CH); 56155845Sbostic } 56255845Sbostic break; 56355845Sbostic case OLPAREN: /* must undo assignment if rest fails */ 56455845Sbostic i = OPND(s); 56555845Sbostic offsave = m->pmatch[i].rm_so; 56655845Sbostic m->pmatch[i].rm_so = sp - m->offp; 56755845Sbostic dp = backref(m, sp, stop, ss+1, stopst, lev); 56855845Sbostic if (dp != NULL) 56955845Sbostic return(dp); 57055845Sbostic m->pmatch[i].rm_so = offsave; 57155845Sbostic return(NULL); 57255845Sbostic break; 57355845Sbostic case ORPAREN: /* must undo assignment if rest fails */ 57455845Sbostic i = OPND(s); 57555845Sbostic offsave = m->pmatch[i].rm_eo; 57655845Sbostic m->pmatch[i].rm_eo = sp - m->offp; 57755845Sbostic dp = backref(m, sp, stop, ss+1, stopst, lev); 57855845Sbostic if (dp != NULL) 57955845Sbostic return(dp); 58055845Sbostic m->pmatch[i].rm_eo = offsave; 58155845Sbostic return(NULL); 58255845Sbostic break; 58355845Sbostic default: /* uh oh */ 58455845Sbostic assert(0); 58555845Sbostic break; 58655845Sbostic } 58755845Sbostic 58855845Sbostic /* "can't happen" */ 58955845Sbostic assert(0); 59055845Sbostic /* NOTREACHED */ 59155845Sbostic } 59255845Sbostic 59355845Sbostic /* 59455845Sbostic - fast - step through the string at top speed 59555845Sbostic */ 59655845Sbostic static uchar * /* where tentative match ended, or NULL */ 59755845Sbostic fast(m, start, stop, startst, stopst) 59855845Sbostic register struct match *m; 59955845Sbostic uchar *start; 60055845Sbostic uchar *stop; 60155845Sbostic sopno startst; 60255845Sbostic sopno stopst; 60355845Sbostic { 60455845Sbostic register states st = m->st; 60555845Sbostic register states fresh = m->fresh; 60655845Sbostic register states tmp = m->tmp; 60755845Sbostic register uchar *p = start; 60855845Sbostic register uchar c; 60955845Sbostic register uchar lastc; /* previous c */ 61055845Sbostic register int atbol; 61155845Sbostic register int ateol; 61255845Sbostic register uchar *coldp; /* last p after which no match was underway */ 61355845Sbostic 61455845Sbostic CLEAR(st); 61555845Sbostic SET1(st, startst); 61655845Sbostic st = expand(m->g, startst, stopst, st, 0, 0); 61755845Sbostic ASSIGN(fresh, st); 61855845Sbostic SP("start", st, *p); 61955845Sbostic c = '\0'; 62055845Sbostic coldp = NULL; 62155845Sbostic for (;;) { 62255845Sbostic /* next character */ 62355845Sbostic lastc = c; 62455845Sbostic c = (p == m->endp) ? '\0' : *p; 62555845Sbostic if (EQ(st, fresh)) 62655845Sbostic coldp = p; 62755845Sbostic 62855845Sbostic /* is there an EOL and/or BOL between lastc and c? */ 62955845Sbostic atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 63055845Sbostic (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 63155845Sbostic ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 63255845Sbostic (c == '\0' && !(m->eflags®_NOTEOL)) ); 63355845Sbostic if (atbol || ateol) { 63455845Sbostic st = expand(m->g, startst, stopst, st, atbol, ateol); 63555845Sbostic SP("bef", st, c); 63655845Sbostic } 63755845Sbostic 63855845Sbostic /* are we done? */ 63955845Sbostic if (ISSET(st, stopst) || p == stop) 64055845Sbostic break; /* NOTE BREAK OUT */ 64155845Sbostic 64255845Sbostic /* no, we must deal with this character */ 64355845Sbostic ASSIGN(tmp, st); 64455845Sbostic ASSIGN(st, fresh); 64555845Sbostic st = step(m->g, startst, stopst, tmp, c, st); 64655845Sbostic SP("aft", st, c); 64755845Sbostic assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 64855845Sbostic p++; 64955845Sbostic } 65055845Sbostic 65155845Sbostic assert(coldp != NULL); 65255845Sbostic m->coldp = coldp; 65355845Sbostic if (ISSET(st, stopst)) 65455845Sbostic return(p+1); 65555845Sbostic else 65655845Sbostic return(NULL); 65755845Sbostic } 65855845Sbostic 65955845Sbostic /* 66055845Sbostic - slow - step through the string more deliberately 66155845Sbostic */ 66255845Sbostic static uchar * /* where it ended */ 66355845Sbostic slow(m, start, stop, startst, stopst) 66455845Sbostic register struct match *m; 66555845Sbostic uchar *start; 66655845Sbostic uchar *stop; 66755845Sbostic sopno startst; 66855845Sbostic sopno stopst; 66955845Sbostic { 67055845Sbostic register states st = m->st; 67155845Sbostic register states empty = m->empty; 67255845Sbostic register states tmp = m->tmp; 67355845Sbostic register uchar *p = start; 67455845Sbostic register uchar c = (start == m->beginp) ? '\0' : *(start-1); 67555845Sbostic register uchar lastc; /* previous c */ 67655845Sbostic register int atbol; 67755845Sbostic register int ateol; 67855845Sbostic register uchar *matchp; /* last p at which a match ended */ 67955845Sbostic 68055845Sbostic DO("slow", start, stop, startst, stopst); 68155845Sbostic CLEAR(st); 68255845Sbostic SET1(st, startst); 68355845Sbostic SP("sstart", st, *p); 68455845Sbostic matchp = NULL; 68555845Sbostic for (;;) { 68655845Sbostic /* next character */ 68755845Sbostic lastc = c; 68855845Sbostic c = (p == m->endp) ? '\0' : *p; 68955845Sbostic 69055845Sbostic /* is there an EOL and/or BOL between lastc and c? */ 69155845Sbostic atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 69255845Sbostic (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 69355845Sbostic ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 69455845Sbostic (c == '\0' && !(m->eflags®_NOTEOL)) ); 69555845Sbostic 69655845Sbostic /* do we need an expansion before looking at the char? */ 69755845Sbostic if (p == start || atbol || ateol) { 69855845Sbostic st = expand(m->g, startst, stopst, st, atbol, ateol); 69955845Sbostic SP("sbef", st, c); 70055845Sbostic } 70155845Sbostic 70255845Sbostic /* are we done? */ 70355845Sbostic if (ISSET(st, stopst)) 70455845Sbostic matchp = p; 70555845Sbostic if (EQ(st, empty) || p == stop) 70655845Sbostic break; /* NOTE BREAK OUT */ 70755845Sbostic 70855845Sbostic /* no, we must deal with this character */ 70955845Sbostic ASSIGN(tmp, st); 71055845Sbostic ASSIGN(st, empty); 71155845Sbostic st = step(m->g, startst, stopst, tmp, c, st); 71255845Sbostic SP("saft", st, c); 71355845Sbostic assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 71455845Sbostic p++; 71555845Sbostic } 71655845Sbostic 71755845Sbostic return(matchp); 71855845Sbostic } 71955845Sbostic 72055845Sbostic 72155845Sbostic /* 72255845Sbostic - expand - return set of states reachable from an initial set 72355845Sbostic */ 72455845Sbostic static states 72555845Sbostic expand(g, start, stop, st, atbol, ateol) 72655845Sbostic register struct re_guts *g; 72755845Sbostic int start; /* start state within strip */ 72855845Sbostic int stop; /* state after stop state within strip */ 72955845Sbostic register states st; 73055845Sbostic int atbol; /* at start or just after \n? (for BOL) */ 73155845Sbostic int ateol; /* just before \n or \0? (for EOL) */ 73255845Sbostic { 73355845Sbostic register sop s; 73455845Sbostic register sopno pc; 73555845Sbostic register onestate here; /* note, macros know this name */ 73655845Sbostic register sopno look; 73755845Sbostic register int i; 73855845Sbostic 73955845Sbostic for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 74055845Sbostic s = g->strip[pc]; 74155845Sbostic switch (OP(s)) { 74255845Sbostic case OEND: 74355845Sbostic assert(pc == stop-1); 74455845Sbostic break; 74555845Sbostic case OCHAR: /* can't get past this */ 74655845Sbostic break; 74755845Sbostic case OBOL: 74855845Sbostic if (atbol) 74955845Sbostic FWD(st, st, 1); 75055845Sbostic break; 75155845Sbostic case OEOL: 75255845Sbostic if (ateol) 75355845Sbostic FWD(st, st, 1); 75455845Sbostic break; 75555845Sbostic case OANY: /* can't get past this either */ 75655845Sbostic break; 75755845Sbostic case OANYOF: /* or this */ 75855845Sbostic break; 75955845Sbostic case OBACK_: /* not significant here */ 76055845Sbostic case O_BACK: 76155845Sbostic FWD(st, st, 1); 76255845Sbostic break; 76355845Sbostic case OPLUS_: /* forward, this is just an empty */ 76455845Sbostic FWD(st, st, 1); 76555845Sbostic break; 76655845Sbostic case O_PLUS: /* both forward and back */ 76755845Sbostic FWD(st, st, 1); 76855845Sbostic i = ISSETBACK(st, OPND(s)); 76955845Sbostic BACK(st, st, OPND(s)); 77055845Sbostic if (!i && ISSETBACK(st, OPND(s))) { 77155845Sbostic /* oho, must reconsider loop body */ 77255845Sbostic pc -= OPND(s) + 1; 77355845Sbostic INIT(here, pc); 77455845Sbostic } 77555845Sbostic break; 77655845Sbostic case OQUEST_: /* two branches, both forward */ 77755845Sbostic FWD(st, st, 1); 77855845Sbostic FWD(st, st, OPND(s)); 77955845Sbostic break; 78055845Sbostic case O_QUEST: /* just an empty */ 78155845Sbostic FWD(st, st, 1); 78255845Sbostic break; 78355845Sbostic case OLPAREN: /* not significant here */ 78455845Sbostic case ORPAREN: 78555845Sbostic FWD(st, st, 1); 78655845Sbostic break; 78755845Sbostic case OCH_: /* mark the first two branches */ 78855845Sbostic FWD(st, st, 1); 78955845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 79055845Sbostic FWD(st, st, OPND(s)); 79155845Sbostic break; 79255845Sbostic case OOR1: /* done a branch, find the O_CH */ 79355845Sbostic if (ISSTATEIN(st, here)) { 79455845Sbostic for (look = 1; 79555845Sbostic OP(s = g->strip[pc+look]) != O_CH; 79655845Sbostic look += OPND(s)) 79755845Sbostic assert(OP(s) == OOR2); 79855845Sbostic FWD(st, st, look); 79955845Sbostic } 80055845Sbostic break; 80155845Sbostic case OOR2: /* propagate OCH_'s marking onward */ 80255845Sbostic FWD(st, st, 1); 80355845Sbostic if (OP(g->strip[pc+OPND(s)]) != O_CH) { 80455845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 80555845Sbostic FWD(st, st, OPND(s)); 80655845Sbostic } 80755845Sbostic break; 80855845Sbostic case O_CH: /* just an empty */ 80955845Sbostic FWD(st, st, 1); 81055845Sbostic break; 81155845Sbostic default: /* ooooops... */ 81255845Sbostic assert(0); 81355845Sbostic break; 81455845Sbostic } 81555845Sbostic } 81655845Sbostic 81755845Sbostic return(st); 81855845Sbostic } 81955845Sbostic 82055845Sbostic /* 82155845Sbostic - step - map set of states reachable before char to set reachable after 82255845Sbostic */ 82355845Sbostic static states 82455845Sbostic step(g, start, stop, bef, ch, aft) 82555845Sbostic register struct re_guts *g; 82655845Sbostic int start; /* start state within strip */ 82755845Sbostic int stop; /* state after stop state within strip */ 82855845Sbostic register states bef; /* states reachable before */ 82955845Sbostic uchar ch; 83055845Sbostic register states aft; /* states already known reachable after */ 83155845Sbostic { 83255845Sbostic register cset *cs; 83355845Sbostic register sop s; 83455845Sbostic register sopno pc; 83555845Sbostic register onestate here; /* note, macros know this name */ 83655845Sbostic register sopno look; 83755845Sbostic register int i; 83855845Sbostic 83955845Sbostic for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 84055845Sbostic s = g->strip[pc]; 84155845Sbostic switch (OP(s)) { 84255845Sbostic case OEND: 84355845Sbostic assert(pc == stop-1); 84455845Sbostic break; 84555845Sbostic case OCHAR: 84655845Sbostic if (ch == OPND(s)) 84755845Sbostic FWD(aft, bef, 1); 84855845Sbostic break; 84955845Sbostic case OBOL: /* these are looked after by expand() */ 85055845Sbostic case OEOL: 85155845Sbostic break; 85255845Sbostic case OANY: 85355845Sbostic FWD(aft, bef, 1); 85455845Sbostic break; 85555845Sbostic case OANYOF: 85655845Sbostic cs = &g->sets[OPND(s)]; 85755845Sbostic if (CHIN(cs, ch)) 85855845Sbostic FWD(aft, bef, 1); 85955845Sbostic break; 86055845Sbostic case OBACK_: /* ignored here */ 86155845Sbostic case O_BACK: 86255845Sbostic FWD(aft, aft, 1); 86355845Sbostic break; 86455845Sbostic case OPLUS_: /* forward, this is just an empty */ 86555845Sbostic FWD(aft, aft, 1); 86655845Sbostic break; 86755845Sbostic case O_PLUS: /* both forward and back */ 86855845Sbostic FWD(aft, aft, 1); 86955845Sbostic i = ISSETBACK(aft, OPND(s)); 87055845Sbostic BACK(aft, aft, OPND(s)); 87155845Sbostic if (!i && ISSETBACK(aft, OPND(s))) { 87255845Sbostic /* oho, must reconsider loop body */ 87355845Sbostic pc -= OPND(s) + 1; 87455845Sbostic INIT(here, pc); 87555845Sbostic } 87655845Sbostic break; 87755845Sbostic case OQUEST_: /* two branches, both forward */ 87855845Sbostic FWD(aft, aft, 1); 87955845Sbostic FWD(aft, aft, OPND(s)); 88055845Sbostic break; 88155845Sbostic case O_QUEST: /* just an empty */ 88255845Sbostic FWD(aft, aft, 1); 88355845Sbostic break; 88455845Sbostic case OLPAREN: /* not significant here */ 88555845Sbostic case ORPAREN: 88655845Sbostic FWD(aft, aft, 1); 88755845Sbostic break; 88855845Sbostic case OCH_: /* mark the first two branches */ 88955845Sbostic FWD(aft, aft, 1); 89055845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 89155845Sbostic FWD(aft, aft, OPND(s)); 89255845Sbostic break; 89355845Sbostic case OOR1: /* done a branch, find the O_CH */ 89455845Sbostic if (ISSTATEIN(aft, here)) { 89555845Sbostic for (look = 1; 89655845Sbostic OP(s = g->strip[pc+look]) != O_CH; 89755845Sbostic look += OPND(s)) 89855845Sbostic assert(OP(s) == OOR2); 89955845Sbostic FWD(aft, aft, look); 90055845Sbostic } 90155845Sbostic break; 90255845Sbostic case OOR2: /* propagate OCH_'s marking */ 90355845Sbostic FWD(aft, aft, 1); 90455845Sbostic if (OP(g->strip[pc+OPND(s)]) != O_CH) { 90555845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 90655845Sbostic FWD(aft, aft, OPND(s)); 90755845Sbostic } 90855845Sbostic break; 90955845Sbostic case O_CH: /* just empty */ 91055845Sbostic FWD(aft, aft, 1); 91155845Sbostic break; 91255845Sbostic default: /* ooooops... */ 91355845Sbostic assert(0); 91455845Sbostic break; 91555845Sbostic } 91655845Sbostic } 91755845Sbostic 91855845Sbostic return(aft); 91955845Sbostic } 92055845Sbostic 92155845Sbostic #ifndef NDEBUG 92255845Sbostic /* 92355845Sbostic - print - print a set of states 92455845Sbostic */ 92555845Sbostic static void 92655845Sbostic print(g, caption, st, ch, d) 92755845Sbostic struct re_guts *g; 92855845Sbostic char *caption; 92955845Sbostic states st; 93055845Sbostic uchar ch; 93155845Sbostic FILE *d; 93255845Sbostic { 93355845Sbostic register int i; 93455845Sbostic register int first = 1; 93555845Sbostic 93655845Sbostic fprintf(d, "%s", caption); 93755845Sbostic if (ch != '\0') 93855845Sbostic fprintf(d, " %s", regchar(ch)); 93955845Sbostic for (i = 0; i < g->nstates; i++) 94055845Sbostic if (ISSET(st, i)) { 94155845Sbostic fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 94255845Sbostic first = 0; 94355845Sbostic } 94455845Sbostic fprintf(d, "\n"); 94555845Sbostic } 94655845Sbostic #endif 94755845Sbostic 94855845Sbostic #undef matcher 94955845Sbostic #undef fast 95055845Sbostic #undef slow 95155845Sbostic #undef dissect 95255845Sbostic #undef backref 95355845Sbostic #undef expand 95455845Sbostic #undef step 95555845Sbostic #undef print 95655845Sbostic #undef match 957