13e12c5d1SDavid du Colombier #include <u.h>
23e12c5d1SDavid du Colombier #include <libc.h>
33e12c5d1SDavid du Colombier #include "regexp.h"
43e12c5d1SDavid du Colombier #include "regcomp.h"
53e12c5d1SDavid du Colombier
63e12c5d1SDavid du Colombier /*
73e12c5d1SDavid du Colombier * return 0 if no match
83e12c5d1SDavid du Colombier * >0 if a match
93e12c5d1SDavid du Colombier * <0 if we ran out of _relist space
103e12c5d1SDavid du Colombier */
113e12c5d1SDavid du Colombier static int
rregexec1(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)123e12c5d1SDavid du Colombier rregexec1(Reprog *progp, /* program to run */
133e12c5d1SDavid du Colombier Rune *bol, /* string to run machine on */
143e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */
153e12c5d1SDavid du Colombier int ms, /* number of elements at mp */
16219b2ee8SDavid du Colombier Reljunk *j)
173e12c5d1SDavid du Colombier {
183e12c5d1SDavid du Colombier int flag=0;
193e12c5d1SDavid du Colombier Reinst *inst;
203e12c5d1SDavid du Colombier Relist *tlp;
213e12c5d1SDavid du Colombier Rune *s;
223e12c5d1SDavid du Colombier int i, checkstart;
233e12c5d1SDavid du Colombier Rune r, *rp, *ep;
243e12c5d1SDavid du Colombier Relist* tl; /* This list, next list */
253e12c5d1SDavid du Colombier Relist* nl;
263e12c5d1SDavid du Colombier Relist* tle; /* ends of this and next list */
273e12c5d1SDavid du Colombier Relist* nle;
283e12c5d1SDavid du Colombier int match;
297366567fSDavid du Colombier Rune *p;
303e12c5d1SDavid du Colombier
313e12c5d1SDavid du Colombier match = 0;
32219b2ee8SDavid du Colombier checkstart = j->startchar;
33219b2ee8SDavid du Colombier if(mp)
34219b2ee8SDavid du Colombier for(i=0; i<ms; i++) {
35219b2ee8SDavid du Colombier mp[i].rsp = 0;
36219b2ee8SDavid du Colombier mp[i].rep = 0;
37219b2ee8SDavid du Colombier }
38219b2ee8SDavid du Colombier j->relist[0][0].inst = 0;
39219b2ee8SDavid du Colombier j->relist[1][0].inst = 0;
403e12c5d1SDavid du Colombier
413e12c5d1SDavid du Colombier /* Execute machine once for each character, including terminal NUL */
42219b2ee8SDavid du Colombier s = j->rstarts;
433e12c5d1SDavid du Colombier do{
443e12c5d1SDavid du Colombier /* fast check for first char */
45219b2ee8SDavid du Colombier if(checkstart) {
46219b2ee8SDavid du Colombier switch(j->starttype) {
47219b2ee8SDavid du Colombier case RUNE:
487366567fSDavid du Colombier p = runestrchr(s, j->startchar);
497366567fSDavid du Colombier if(p == 0 || s == j->reol)
50219b2ee8SDavid du Colombier return match;
517366567fSDavid du Colombier s = p;
52219b2ee8SDavid du Colombier break;
53219b2ee8SDavid du Colombier case BOL:
54219b2ee8SDavid du Colombier if(s == bol)
55219b2ee8SDavid du Colombier break;
567366567fSDavid du Colombier p = runestrchr(s, '\n');
577366567fSDavid du Colombier if(p == 0 || s == j->reol)
58219b2ee8SDavid du Colombier return match;
597366567fSDavid du Colombier s = p+1;
60219b2ee8SDavid du Colombier break;
61219b2ee8SDavid du Colombier }
623e12c5d1SDavid du Colombier }
633e12c5d1SDavid du Colombier
64219b2ee8SDavid du Colombier r = *s;
65219b2ee8SDavid du Colombier
663e12c5d1SDavid du Colombier /* switch run lists */
67219b2ee8SDavid du Colombier tl = j->relist[flag];
68219b2ee8SDavid du Colombier tle = j->reliste[flag];
69219b2ee8SDavid du Colombier nl = j->relist[flag^=1];
70219b2ee8SDavid du Colombier nle = j->reliste[flag];
713e12c5d1SDavid du Colombier nl->inst = 0;
723e12c5d1SDavid du Colombier
733e12c5d1SDavid du Colombier /* Add first instruction to current list */
747dd7cddfSDavid du Colombier _rrenewemptythread(tl, progp->startinst, ms, s);
753e12c5d1SDavid du Colombier
763e12c5d1SDavid du Colombier /* Execute machine until current list is empty */
77219b2ee8SDavid du Colombier for(tlp=tl; tlp->inst; tlp++){
783e12c5d1SDavid du Colombier for(inst=tlp->inst; ; inst = inst->next){
793e12c5d1SDavid du Colombier switch(inst->type){
803e12c5d1SDavid du Colombier case RUNE: /* regular character */
813e12c5d1SDavid du Colombier if(inst->r == r)
827dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
833e12c5d1SDavid du Colombier return -1;
843e12c5d1SDavid du Colombier break;
853e12c5d1SDavid du Colombier case LBRA:
863e12c5d1SDavid du Colombier tlp->se.m[inst->subid].rsp = s;
873e12c5d1SDavid du Colombier continue;
883e12c5d1SDavid du Colombier case RBRA:
893e12c5d1SDavid du Colombier tlp->se.m[inst->subid].rep = s;
903e12c5d1SDavid du Colombier continue;
913e12c5d1SDavid du Colombier case ANY:
923e12c5d1SDavid du Colombier if(r != '\n')
937dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
943e12c5d1SDavid du Colombier return -1;
953e12c5d1SDavid du Colombier break;
963e12c5d1SDavid du Colombier case ANYNL:
977dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
983e12c5d1SDavid du Colombier return -1;
993e12c5d1SDavid du Colombier break;
1003e12c5d1SDavid du Colombier case BOL:
1013e12c5d1SDavid du Colombier if(s == bol || *(s-1) == '\n')
1023e12c5d1SDavid du Colombier continue;
1033e12c5d1SDavid du Colombier break;
1043e12c5d1SDavid du Colombier case EOL:
1057dd7cddfSDavid du Colombier if(s == j->reol || r == 0 || r == '\n')
1063e12c5d1SDavid du Colombier continue;
1073e12c5d1SDavid du Colombier break;
1083e12c5d1SDavid du Colombier case CCLASS:
1093e12c5d1SDavid du Colombier ep = inst->cp->end;
1103e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2)
1113e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]){
1127dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
1133e12c5d1SDavid du Colombier return -1;
1143e12c5d1SDavid du Colombier break;
1153e12c5d1SDavid du Colombier }
1163e12c5d1SDavid du Colombier break;
1173e12c5d1SDavid du Colombier case NCCLASS:
1183e12c5d1SDavid du Colombier ep = inst->cp->end;
1193e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2)
1203e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1])
1213e12c5d1SDavid du Colombier break;
1223e12c5d1SDavid du Colombier if(rp == ep)
1237dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
1243e12c5d1SDavid du Colombier return -1;
1253e12c5d1SDavid du Colombier break;
1263e12c5d1SDavid du Colombier case OR:
1273e12c5d1SDavid du Colombier /* evaluate right choice later */
128*e7f22645SDavid du Colombier if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
1293e12c5d1SDavid du Colombier return -1;
1303e12c5d1SDavid du Colombier /* efficiency: advance and re-evaluate */
1313e12c5d1SDavid du Colombier continue;
1323e12c5d1SDavid du Colombier case END: /* Match! */
1333e12c5d1SDavid du Colombier match = 1;
1343e12c5d1SDavid du Colombier tlp->se.m[0].rep = s;
1353e12c5d1SDavid du Colombier if(mp != 0)
1363e12c5d1SDavid du Colombier _renewmatch(mp, ms, &tlp->se);
1373e12c5d1SDavid du Colombier break;
1383e12c5d1SDavid du Colombier }
1393e12c5d1SDavid du Colombier break;
1403e12c5d1SDavid du Colombier }
1413e12c5d1SDavid du Colombier }
1427dd7cddfSDavid du Colombier if(s == j->reol)
1437dd7cddfSDavid du Colombier break;
144219b2ee8SDavid du Colombier checkstart = j->startchar && nl->inst==0;
1453e12c5d1SDavid du Colombier s++;
1463e12c5d1SDavid du Colombier }while(r);
1473e12c5d1SDavid du Colombier return match;
1483e12c5d1SDavid du Colombier }
1493e12c5d1SDavid du Colombier
150219b2ee8SDavid du Colombier static int
rregexec2(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)151219b2ee8SDavid du Colombier rregexec2(Reprog *progp, /* program to run */
152219b2ee8SDavid du Colombier Rune *bol, /* string to run machine on */
153219b2ee8SDavid du Colombier Resub *mp, /* subexpression elements */
154219b2ee8SDavid du Colombier int ms, /* number of elements at mp */
155219b2ee8SDavid du Colombier Reljunk *j
156219b2ee8SDavid du Colombier )
157219b2ee8SDavid du Colombier {
158219b2ee8SDavid du Colombier Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
159219b2ee8SDavid du Colombier
160219b2ee8SDavid du Colombier /* mark space */
161219b2ee8SDavid du Colombier j->relist[0] = relist0;
162219b2ee8SDavid du Colombier j->relist[1] = relist1;
163219b2ee8SDavid du Colombier j->reliste[0] = relist0 + nelem(relist0) - 2;
164219b2ee8SDavid du Colombier j->reliste[1] = relist1 + nelem(relist1) - 2;
165219b2ee8SDavid du Colombier
166219b2ee8SDavid du Colombier return rregexec1(progp, bol, mp, ms, j);
167219b2ee8SDavid du Colombier }
168219b2ee8SDavid du Colombier
1693e12c5d1SDavid du Colombier extern int
rregexec(Reprog * progp,Rune * bol,Resub * mp,int ms)1703e12c5d1SDavid du Colombier rregexec(Reprog *progp, /* program to run */
1713e12c5d1SDavid du Colombier Rune *bol, /* string to run machine on */
1723e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */
1733e12c5d1SDavid du Colombier int ms) /* number of elements at mp */
1743e12c5d1SDavid du Colombier {
175219b2ee8SDavid du Colombier Reljunk j;
176219b2ee8SDavid du Colombier Relist relist0[LISTSIZE], relist1[LISTSIZE];
1773e12c5d1SDavid du Colombier int rv;
1783e12c5d1SDavid du Colombier
1793e12c5d1SDavid du Colombier /*
1803e12c5d1SDavid du Colombier * use user-specified starting/ending location if specified
1813e12c5d1SDavid du Colombier */
182219b2ee8SDavid du Colombier j.rstarts = bol;
183219b2ee8SDavid du Colombier j.reol = 0;
1843e12c5d1SDavid du Colombier if(mp && ms>0){
185219b2ee8SDavid du Colombier if(mp->sp)
186219b2ee8SDavid du Colombier j.rstarts = mp->rsp;
187219b2ee8SDavid du Colombier if(mp->ep)
188219b2ee8SDavid du Colombier j.reol = mp->rep;
1893e12c5d1SDavid du Colombier }
190219b2ee8SDavid du Colombier j.starttype = 0;
191219b2ee8SDavid du Colombier j.startchar = 0;
192219b2ee8SDavid du Colombier if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
193219b2ee8SDavid du Colombier j.starttype = RUNE;
194219b2ee8SDavid du Colombier j.startchar = progp->startinst->r;
195219b2ee8SDavid du Colombier }
196219b2ee8SDavid du Colombier if(progp->startinst->type == BOL)
197219b2ee8SDavid du Colombier j.starttype = BOL;
1983e12c5d1SDavid du Colombier
199219b2ee8SDavid du Colombier /* mark space */
200219b2ee8SDavid du Colombier j.relist[0] = relist0;
201219b2ee8SDavid du Colombier j.relist[1] = relist1;
202219b2ee8SDavid du Colombier j.reliste[0] = relist0 + nelem(relist0) - 2;
203219b2ee8SDavid du Colombier j.reliste[1] = relist1 + nelem(relist1) - 2;
204219b2ee8SDavid du Colombier
205219b2ee8SDavid du Colombier rv = rregexec1(progp, bol, mp, ms, &j);
2063e12c5d1SDavid du Colombier if(rv >= 0)
2073e12c5d1SDavid du Colombier return rv;
208219b2ee8SDavid du Colombier rv = rregexec2(progp, bol, mp, ms, &j);
209219b2ee8SDavid du Colombier if(rv >= 0)
210219b2ee8SDavid du Colombier return rv;
2113e12c5d1SDavid du Colombier return -1;
2123e12c5d1SDavid du Colombier }
213