13e12c5d1SDavid du Colombier #include <u.h>
23e12c5d1SDavid du Colombier #include <libc.h>
33e12c5d1SDavid du Colombier #include "regexp.h"
43e12c5d1SDavid du Colombier #include "regcomp.h"
53e12c5d1SDavid du Colombier
63e12c5d1SDavid du Colombier
73e12c5d1SDavid du Colombier /*
83e12c5d1SDavid du Colombier * return 0 if no match
93e12c5d1SDavid du Colombier * >0 if a match
103e12c5d1SDavid du Colombier * <0 if we ran out of _relist space
113e12c5d1SDavid du Colombier */
123e12c5d1SDavid du Colombier static int
regexec1(Reprog * progp,char * bol,Resub * mp,int ms,Reljunk * j)133e12c5d1SDavid du Colombier regexec1(Reprog *progp, /* program to run */
143e12c5d1SDavid du Colombier char *bol, /* string to run machine on */
153e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */
163e12c5d1SDavid du Colombier int ms, /* number of elements at mp */
17219b2ee8SDavid du Colombier Reljunk *j
18219b2ee8SDavid du Colombier )
193e12c5d1SDavid du Colombier {
203e12c5d1SDavid du Colombier int flag=0;
213e12c5d1SDavid du Colombier Reinst *inst;
223e12c5d1SDavid du Colombier Relist *tlp;
233e12c5d1SDavid du Colombier char *s;
243e12c5d1SDavid du Colombier int i, checkstart;
253e12c5d1SDavid du Colombier Rune r, *rp, *ep;
263e12c5d1SDavid du Colombier int n;
273e12c5d1SDavid du Colombier Relist* tl; /* This list, next list */
283e12c5d1SDavid du Colombier Relist* nl;
293e12c5d1SDavid du Colombier Relist* tle; /* ends of this and next list */
303e12c5d1SDavid du Colombier Relist* nle;
313e12c5d1SDavid du Colombier int match;
323e12c5d1SDavid du Colombier char *p;
333e12c5d1SDavid du Colombier
343e12c5d1SDavid du Colombier match = 0;
35219b2ee8SDavid du Colombier checkstart = j->starttype;
36219b2ee8SDavid du Colombier if(mp)
37219b2ee8SDavid du Colombier for(i=0; i<ms; i++) {
38219b2ee8SDavid du Colombier mp[i].sp = 0;
39219b2ee8SDavid du Colombier mp[i].ep = 0;
40219b2ee8SDavid du Colombier }
41219b2ee8SDavid du Colombier j->relist[0][0].inst = 0;
42219b2ee8SDavid du Colombier j->relist[1][0].inst = 0;
433e12c5d1SDavid du Colombier
443e12c5d1SDavid du Colombier /* Execute machine once for each character, including terminal NUL */
45219b2ee8SDavid du Colombier s = j->starts;
463e12c5d1SDavid du Colombier do{
473e12c5d1SDavid du Colombier /* fast check for first char */
483e12c5d1SDavid du Colombier if(checkstart) {
49219b2ee8SDavid du Colombier switch(j->starttype) {
50219b2ee8SDavid du Colombier case RUNE:
51219b2ee8SDavid du Colombier p = utfrune(s, j->startchar);
52e34dbc32SDavid du Colombier if(p == 0 || s == j->eol)
533e12c5d1SDavid du Colombier return match;
543e12c5d1SDavid du Colombier s = p;
55219b2ee8SDavid du Colombier break;
56219b2ee8SDavid du Colombier case BOL:
57219b2ee8SDavid du Colombier if(s == bol)
58219b2ee8SDavid du Colombier break;
59219b2ee8SDavid du Colombier p = utfrune(s, '\n');
60e34dbc32SDavid du Colombier if(p == 0 || s == j->eol)
61219b2ee8SDavid du Colombier return match;
627366567fSDavid du Colombier s = p+1;
63219b2ee8SDavid du Colombier break;
64219b2ee8SDavid du Colombier }
653e12c5d1SDavid du Colombier }
663e12c5d1SDavid du Colombier r = *(uchar*)s;
673e12c5d1SDavid du Colombier if(r < Runeself)
683e12c5d1SDavid du Colombier n = 1;
693e12c5d1SDavid du Colombier else
703e12c5d1SDavid du Colombier n = chartorune(&r, s);
713e12c5d1SDavid du Colombier
723e12c5d1SDavid du Colombier /* switch run lists */
73219b2ee8SDavid du Colombier tl = j->relist[flag];
74219b2ee8SDavid du Colombier tle = j->reliste[flag];
75219b2ee8SDavid du Colombier nl = j->relist[flag^=1];
76219b2ee8SDavid du Colombier nle = j->reliste[flag];
773e12c5d1SDavid du Colombier nl->inst = 0;
783e12c5d1SDavid du Colombier
793e12c5d1SDavid du Colombier /* Add first instruction to current list */
80219b2ee8SDavid du Colombier if(match == 0)
817dd7cddfSDavid du Colombier _renewemptythread(tl, progp->startinst, ms, s);
823e12c5d1SDavid du Colombier
833e12c5d1SDavid du Colombier /* Execute machine until current list is empty */
843e12c5d1SDavid du Colombier for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
853e12c5d1SDavid du Colombier for(inst = tlp->inst; ; inst = inst->next){
863e12c5d1SDavid du Colombier switch(inst->type){
873e12c5d1SDavid du Colombier case RUNE: /* regular character */
88219b2ee8SDavid du Colombier if(inst->r == r){
897dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
903e12c5d1SDavid du Colombier return -1;
91219b2ee8SDavid du Colombier }
923e12c5d1SDavid du Colombier break;
933e12c5d1SDavid du Colombier case LBRA:
943e12c5d1SDavid du Colombier tlp->se.m[inst->subid].sp = s;
953e12c5d1SDavid du Colombier continue;
963e12c5d1SDavid du Colombier case RBRA:
973e12c5d1SDavid du Colombier tlp->se.m[inst->subid].ep = s;
983e12c5d1SDavid du Colombier continue;
993e12c5d1SDavid du Colombier case ANY:
1003e12c5d1SDavid du Colombier if(r != '\n')
1017dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
1023e12c5d1SDavid du Colombier return -1;
1033e12c5d1SDavid du Colombier break;
1043e12c5d1SDavid du Colombier case ANYNL:
1057dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
1063e12c5d1SDavid du Colombier return -1;
1073e12c5d1SDavid du Colombier break;
1083e12c5d1SDavid du Colombier case BOL:
1093e12c5d1SDavid du Colombier if(s == bol || *(s-1) == '\n')
1103e12c5d1SDavid du Colombier continue;
1113e12c5d1SDavid du Colombier break;
1123e12c5d1SDavid du Colombier case EOL:
1137dd7cddfSDavid du Colombier if(s == j->eol || r == 0 || r == '\n')
1143e12c5d1SDavid du Colombier continue;
1153e12c5d1SDavid du Colombier break;
1163e12c5d1SDavid du Colombier case CCLASS:
1173e12c5d1SDavid du Colombier ep = inst->cp->end;
1183e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2)
1193e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]){
1207dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
1213e12c5d1SDavid du Colombier return -1;
1223e12c5d1SDavid du Colombier break;
1233e12c5d1SDavid du Colombier }
1243e12c5d1SDavid du Colombier break;
1253e12c5d1SDavid du Colombier case NCCLASS:
1263e12c5d1SDavid du Colombier ep = inst->cp->end;
1273e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2)
1283e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1])
1293e12c5d1SDavid du Colombier break;
1303e12c5d1SDavid du Colombier if(rp == ep)
1317dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
1323e12c5d1SDavid du Colombier return -1;
1333e12c5d1SDavid du Colombier break;
1343e12c5d1SDavid du Colombier case OR:
1353e12c5d1SDavid du Colombier /* evaluate right choice later */
136*e7f22645SDavid du Colombier if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
1373e12c5d1SDavid du Colombier return -1;
1383e12c5d1SDavid du Colombier /* efficiency: advance and re-evaluate */
1393e12c5d1SDavid du Colombier continue;
1403e12c5d1SDavid du Colombier case END: /* Match! */
1413e12c5d1SDavid du Colombier match = 1;
1423e12c5d1SDavid du Colombier tlp->se.m[0].ep = s;
1433e12c5d1SDavid du Colombier if(mp != 0)
1443e12c5d1SDavid du Colombier _renewmatch(mp, ms, &tlp->se);
1453e12c5d1SDavid du Colombier break;
1463e12c5d1SDavid du Colombier }
1473e12c5d1SDavid du Colombier break;
1483e12c5d1SDavid du Colombier }
1493e12c5d1SDavid du Colombier }
1507dd7cddfSDavid du Colombier if(s == j->eol)
1517dd7cddfSDavid du Colombier break;
152219b2ee8SDavid du Colombier checkstart = j->starttype && nl->inst==0;
1533e12c5d1SDavid du Colombier s += n;
1543e12c5d1SDavid du Colombier }while(r);
1553e12c5d1SDavid du Colombier return match;
1563e12c5d1SDavid du Colombier }
1573e12c5d1SDavid du Colombier
158219b2ee8SDavid du Colombier static int
regexec2(Reprog * progp,char * bol,Resub * mp,int ms,Reljunk * j)159219b2ee8SDavid du Colombier regexec2(Reprog *progp, /* program to run */
160219b2ee8SDavid du Colombier char *bol, /* string to run machine on */
161219b2ee8SDavid du Colombier Resub *mp, /* subexpression elements */
162219b2ee8SDavid du Colombier int ms, /* number of elements at mp */
163219b2ee8SDavid du Colombier Reljunk *j
164219b2ee8SDavid du Colombier )
165219b2ee8SDavid du Colombier {
1667dd7cddfSDavid du Colombier int rv;
1677dd7cddfSDavid du Colombier Relist *relist0, *relist1;
168219b2ee8SDavid du Colombier
169219b2ee8SDavid du Colombier /* mark space */
1707dd7cddfSDavid du Colombier relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
1717dd7cddfSDavid du Colombier if(relist0 == nil)
1727dd7cddfSDavid du Colombier return -1;
1737dd7cddfSDavid du Colombier relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
1747dd7cddfSDavid du Colombier if(relist1 == nil){
1757dd7cddfSDavid du Colombier free(relist1);
1767dd7cddfSDavid du Colombier return -1;
1777dd7cddfSDavid du Colombier }
178219b2ee8SDavid du Colombier j->relist[0] = relist0;
179219b2ee8SDavid du Colombier j->relist[1] = relist1;
1807dd7cddfSDavid du Colombier j->reliste[0] = relist0 + BIGLISTSIZE - 2;
1817dd7cddfSDavid du Colombier j->reliste[1] = relist1 + BIGLISTSIZE - 2;
182219b2ee8SDavid du Colombier
1837dd7cddfSDavid du Colombier rv = regexec1(progp, bol, mp, ms, j);
1847dd7cddfSDavid du Colombier free(relist0);
1857dd7cddfSDavid du Colombier free(relist1);
1867dd7cddfSDavid du Colombier return rv;
187219b2ee8SDavid du Colombier }
188219b2ee8SDavid du Colombier
1893e12c5d1SDavid du Colombier extern int
regexec(Reprog * progp,char * bol,Resub * mp,int ms)1903e12c5d1SDavid du Colombier regexec(Reprog *progp, /* program to run */
1913e12c5d1SDavid du Colombier char *bol, /* string to run machine on */
1923e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */
1933e12c5d1SDavid du Colombier int ms) /* number of elements at mp */
1943e12c5d1SDavid du Colombier {
195219b2ee8SDavid du Colombier Reljunk j;
196219b2ee8SDavid du Colombier Relist relist0[LISTSIZE], relist1[LISTSIZE];
1973e12c5d1SDavid du Colombier int rv;
1983e12c5d1SDavid du Colombier
1993e12c5d1SDavid du Colombier /*
2003e12c5d1SDavid du Colombier * use user-specified starting/ending location if specified
2013e12c5d1SDavid du Colombier */
202219b2ee8SDavid du Colombier j.starts = bol;
203219b2ee8SDavid du Colombier j.eol = 0;
2043e12c5d1SDavid du Colombier if(mp && ms>0){
2053e12c5d1SDavid du Colombier if(mp->sp)
206219b2ee8SDavid du Colombier j.starts = mp->sp;
2073e12c5d1SDavid du Colombier if(mp->ep)
208219b2ee8SDavid du Colombier j.eol = mp->ep;
2093e12c5d1SDavid du Colombier }
210219b2ee8SDavid du Colombier j.starttype = 0;
211219b2ee8SDavid du Colombier j.startchar = 0;
212219b2ee8SDavid du Colombier if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
213219b2ee8SDavid du Colombier j.starttype = RUNE;
214219b2ee8SDavid du Colombier j.startchar = progp->startinst->r;
215219b2ee8SDavid du Colombier }
216219b2ee8SDavid du Colombier if(progp->startinst->type == BOL)
217219b2ee8SDavid du Colombier j.starttype = BOL;
2183e12c5d1SDavid du Colombier
219219b2ee8SDavid du Colombier /* mark space */
220219b2ee8SDavid du Colombier j.relist[0] = relist0;
221219b2ee8SDavid du Colombier j.relist[1] = relist1;
222219b2ee8SDavid du Colombier j.reliste[0] = relist0 + nelem(relist0) - 2;
223219b2ee8SDavid du Colombier j.reliste[1] = relist1 + nelem(relist1) - 2;
224219b2ee8SDavid du Colombier
225219b2ee8SDavid du Colombier rv = regexec1(progp, bol, mp, ms, &j);
2263e12c5d1SDavid du Colombier if(rv >= 0)
2273e12c5d1SDavid du Colombier return rv;
228219b2ee8SDavid du Colombier rv = regexec2(progp, bol, mp, ms, &j);
229219b2ee8SDavid du Colombier if(rv >= 0)
230219b2ee8SDavid du Colombier return rv;
2313e12c5d1SDavid du Colombier return -1;
2323e12c5d1SDavid du Colombier }
233