13e12c5d1SDavid du Colombier #include <u.h> 23e12c5d1SDavid du Colombier #include <libc.h> 33e12c5d1SDavid du Colombier #include "regexp.h" 43e12c5d1SDavid du Colombier #include "regcomp.h" 53e12c5d1SDavid du Colombier 63e12c5d1SDavid du Colombier 73e12c5d1SDavid du Colombier /* 83e12c5d1SDavid du Colombier * return 0 if no match 93e12c5d1SDavid du Colombier * >0 if a match 103e12c5d1SDavid du Colombier * <0 if we ran out of _relist space 113e12c5d1SDavid du Colombier */ 123e12c5d1SDavid du Colombier static int 133e12c5d1SDavid du Colombier regexec1(Reprog *progp, /* program to run */ 143e12c5d1SDavid du Colombier char *bol, /* string to run machine on */ 153e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */ 163e12c5d1SDavid du Colombier int ms, /* number of elements at mp */ 17219b2ee8SDavid du Colombier Reljunk *j 18219b2ee8SDavid du Colombier ) 193e12c5d1SDavid du Colombier { 203e12c5d1SDavid du Colombier int flag=0; 213e12c5d1SDavid du Colombier Reinst *inst; 223e12c5d1SDavid du Colombier Relist *tlp; 233e12c5d1SDavid du Colombier char *s; 243e12c5d1SDavid du Colombier int i, checkstart; 253e12c5d1SDavid du Colombier Rune r, *rp, *ep; 263e12c5d1SDavid du Colombier int n; 273e12c5d1SDavid du Colombier Relist* tl; /* This list, next list */ 283e12c5d1SDavid du Colombier Relist* nl; 293e12c5d1SDavid du Colombier Relist* tle; /* ends of this and next list */ 303e12c5d1SDavid du Colombier Relist* nle; 313e12c5d1SDavid du Colombier int match; 323e12c5d1SDavid du Colombier char *p; 333e12c5d1SDavid du Colombier 343e12c5d1SDavid du Colombier match = 0; 35219b2ee8SDavid du Colombier checkstart = j->starttype; 36219b2ee8SDavid du Colombier if(mp) 37219b2ee8SDavid du Colombier for(i=0; i<ms; i++) { 38219b2ee8SDavid du Colombier mp[i].sp = 0; 39219b2ee8SDavid du Colombier mp[i].ep = 0; 40219b2ee8SDavid du Colombier } 41219b2ee8SDavid du Colombier j->relist[0][0].inst = 0; 42219b2ee8SDavid du Colombier j->relist[1][0].inst = 0; 433e12c5d1SDavid du Colombier 443e12c5d1SDavid du Colombier /* Execute machine once for each character, including terminal NUL */ 45219b2ee8SDavid du Colombier s = j->starts; 463e12c5d1SDavid du Colombier do{ 473e12c5d1SDavid du Colombier /* fast check for first char */ 483e12c5d1SDavid du Colombier if(checkstart) { 49219b2ee8SDavid du Colombier switch(j->starttype) { 50219b2ee8SDavid du Colombier case RUNE: 51219b2ee8SDavid du Colombier p = utfrune(s, j->startchar); 52e34dbc32SDavid du Colombier if(p == 0 || s == j->eol) 533e12c5d1SDavid du Colombier return match; 543e12c5d1SDavid du Colombier s = p; 55219b2ee8SDavid du Colombier break; 56219b2ee8SDavid du Colombier case BOL: 57219b2ee8SDavid du Colombier if(s == bol) 58219b2ee8SDavid du Colombier break; 59219b2ee8SDavid du Colombier p = utfrune(s, '\n'); 60e34dbc32SDavid du Colombier if(p == 0 || s == j->eol) 61219b2ee8SDavid du Colombier return match; 62*7366567fSDavid du Colombier s = p+1; 63219b2ee8SDavid du Colombier break; 64219b2ee8SDavid du Colombier } 653e12c5d1SDavid du Colombier } 663e12c5d1SDavid du Colombier r = *(uchar*)s; 673e12c5d1SDavid du Colombier if(r < Runeself) 683e12c5d1SDavid du Colombier n = 1; 693e12c5d1SDavid du Colombier else 703e12c5d1SDavid du Colombier n = chartorune(&r, s); 713e12c5d1SDavid du Colombier 723e12c5d1SDavid du Colombier /* switch run lists */ 73219b2ee8SDavid du Colombier tl = j->relist[flag]; 74219b2ee8SDavid du Colombier tle = j->reliste[flag]; 75219b2ee8SDavid du Colombier nl = j->relist[flag^=1]; 76219b2ee8SDavid du Colombier nle = j->reliste[flag]; 773e12c5d1SDavid du Colombier nl->inst = 0; 783e12c5d1SDavid du Colombier 793e12c5d1SDavid du Colombier /* Add first instruction to current list */ 80219b2ee8SDavid du Colombier if(match == 0) 817dd7cddfSDavid du Colombier _renewemptythread(tl, progp->startinst, ms, s); 823e12c5d1SDavid du Colombier 833e12c5d1SDavid du Colombier /* Execute machine until current list is empty */ 843e12c5d1SDavid du Colombier for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 853e12c5d1SDavid du Colombier for(inst = tlp->inst; ; inst = inst->next){ 863e12c5d1SDavid du Colombier switch(inst->type){ 873e12c5d1SDavid du Colombier case RUNE: /* regular character */ 88219b2ee8SDavid du Colombier if(inst->r == r){ 897dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 903e12c5d1SDavid du Colombier return -1; 91219b2ee8SDavid du Colombier } 923e12c5d1SDavid du Colombier break; 933e12c5d1SDavid du Colombier case LBRA: 943e12c5d1SDavid du Colombier tlp->se.m[inst->subid].sp = s; 953e12c5d1SDavid du Colombier continue; 963e12c5d1SDavid du Colombier case RBRA: 973e12c5d1SDavid du Colombier tlp->se.m[inst->subid].ep = s; 983e12c5d1SDavid du Colombier continue; 993e12c5d1SDavid du Colombier case ANY: 1003e12c5d1SDavid du Colombier if(r != '\n') 1017dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1023e12c5d1SDavid du Colombier return -1; 1033e12c5d1SDavid du Colombier break; 1043e12c5d1SDavid du Colombier case ANYNL: 1057dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1063e12c5d1SDavid du Colombier return -1; 1073e12c5d1SDavid du Colombier break; 1083e12c5d1SDavid du Colombier case BOL: 1093e12c5d1SDavid du Colombier if(s == bol || *(s-1) == '\n') 1103e12c5d1SDavid du Colombier continue; 1113e12c5d1SDavid du Colombier break; 1123e12c5d1SDavid du Colombier case EOL: 1137dd7cddfSDavid du Colombier if(s == j->eol || r == 0 || r == '\n') 1143e12c5d1SDavid du Colombier continue; 1153e12c5d1SDavid du Colombier break; 1163e12c5d1SDavid du Colombier case CCLASS: 1173e12c5d1SDavid du Colombier ep = inst->cp->end; 1183e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2) 1193e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]){ 1207dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1213e12c5d1SDavid du Colombier return -1; 1223e12c5d1SDavid du Colombier break; 1233e12c5d1SDavid du Colombier } 1243e12c5d1SDavid du Colombier break; 1253e12c5d1SDavid du Colombier case NCCLASS: 1263e12c5d1SDavid du Colombier ep = inst->cp->end; 1273e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2) 1283e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]) 1293e12c5d1SDavid du Colombier break; 1303e12c5d1SDavid du Colombier if(rp == ep) 1317dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1323e12c5d1SDavid du Colombier return -1; 1333e12c5d1SDavid du Colombier break; 1343e12c5d1SDavid du Colombier case OR: 1353e12c5d1SDavid du Colombier /* evaluate right choice later */ 1367dd7cddfSDavid du Colombier if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) 1373e12c5d1SDavid du Colombier return -1; 1383e12c5d1SDavid du Colombier /* efficiency: advance and re-evaluate */ 1393e12c5d1SDavid du Colombier continue; 1403e12c5d1SDavid du Colombier case END: /* Match! */ 1413e12c5d1SDavid du Colombier match = 1; 1423e12c5d1SDavid du Colombier tlp->se.m[0].ep = s; 1433e12c5d1SDavid du Colombier if(mp != 0) 1443e12c5d1SDavid du Colombier _renewmatch(mp, ms, &tlp->se); 1453e12c5d1SDavid du Colombier break; 1463e12c5d1SDavid du Colombier } 1473e12c5d1SDavid du Colombier break; 1483e12c5d1SDavid du Colombier } 1493e12c5d1SDavid du Colombier } 1507dd7cddfSDavid du Colombier if(s == j->eol) 1517dd7cddfSDavid du Colombier break; 152219b2ee8SDavid du Colombier checkstart = j->starttype && nl->inst==0; 1533e12c5d1SDavid du Colombier s += n; 1543e12c5d1SDavid du Colombier }while(r); 1553e12c5d1SDavid du Colombier return match; 1563e12c5d1SDavid du Colombier } 1573e12c5d1SDavid du Colombier 158219b2ee8SDavid du Colombier static int 159219b2ee8SDavid du Colombier regexec2(Reprog *progp, /* program to run */ 160219b2ee8SDavid du Colombier char *bol, /* string to run machine on */ 161219b2ee8SDavid du Colombier Resub *mp, /* subexpression elements */ 162219b2ee8SDavid du Colombier int ms, /* number of elements at mp */ 163219b2ee8SDavid du Colombier Reljunk *j 164219b2ee8SDavid du Colombier ) 165219b2ee8SDavid du Colombier { 1667dd7cddfSDavid du Colombier int rv; 1677dd7cddfSDavid du Colombier Relist *relist0, *relist1; 168219b2ee8SDavid du Colombier 169219b2ee8SDavid du Colombier /* mark space */ 1707dd7cddfSDavid du Colombier relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); 1717dd7cddfSDavid du Colombier if(relist0 == nil) 1727dd7cddfSDavid du Colombier return -1; 1737dd7cddfSDavid du Colombier relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); 1747dd7cddfSDavid du Colombier if(relist1 == nil){ 1757dd7cddfSDavid du Colombier free(relist1); 1767dd7cddfSDavid du Colombier return -1; 1777dd7cddfSDavid du Colombier } 178219b2ee8SDavid du Colombier j->relist[0] = relist0; 179219b2ee8SDavid du Colombier j->relist[1] = relist1; 1807dd7cddfSDavid du Colombier j->reliste[0] = relist0 + BIGLISTSIZE - 2; 1817dd7cddfSDavid du Colombier j->reliste[1] = relist1 + BIGLISTSIZE - 2; 182219b2ee8SDavid du Colombier 1837dd7cddfSDavid du Colombier rv = regexec1(progp, bol, mp, ms, j); 1847dd7cddfSDavid du Colombier free(relist0); 1857dd7cddfSDavid du Colombier free(relist1); 1867dd7cddfSDavid du Colombier return rv; 187219b2ee8SDavid du Colombier } 188219b2ee8SDavid du Colombier 1893e12c5d1SDavid du Colombier extern int 1903e12c5d1SDavid du Colombier regexec(Reprog *progp, /* program to run */ 1913e12c5d1SDavid du Colombier char *bol, /* string to run machine on */ 1923e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */ 1933e12c5d1SDavid du Colombier int ms) /* number of elements at mp */ 1943e12c5d1SDavid du Colombier { 195219b2ee8SDavid du Colombier Reljunk j; 196219b2ee8SDavid du Colombier Relist relist0[LISTSIZE], relist1[LISTSIZE]; 1973e12c5d1SDavid du Colombier int rv; 1983e12c5d1SDavid du Colombier 1993e12c5d1SDavid du Colombier /* 2003e12c5d1SDavid du Colombier * use user-specified starting/ending location if specified 2013e12c5d1SDavid du Colombier */ 202219b2ee8SDavid du Colombier j.starts = bol; 203219b2ee8SDavid du Colombier j.eol = 0; 2043e12c5d1SDavid du Colombier if(mp && ms>0){ 2053e12c5d1SDavid du Colombier if(mp->sp) 206219b2ee8SDavid du Colombier j.starts = mp->sp; 2073e12c5d1SDavid du Colombier if(mp->ep) 208219b2ee8SDavid du Colombier j.eol = mp->ep; 2093e12c5d1SDavid du Colombier } 210219b2ee8SDavid du Colombier j.starttype = 0; 211219b2ee8SDavid du Colombier j.startchar = 0; 212219b2ee8SDavid du Colombier if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { 213219b2ee8SDavid du Colombier j.starttype = RUNE; 214219b2ee8SDavid du Colombier j.startchar = progp->startinst->r; 215219b2ee8SDavid du Colombier } 216219b2ee8SDavid du Colombier if(progp->startinst->type == BOL) 217219b2ee8SDavid du Colombier j.starttype = BOL; 2183e12c5d1SDavid du Colombier 219219b2ee8SDavid du Colombier /* mark space */ 220219b2ee8SDavid du Colombier j.relist[0] = relist0; 221219b2ee8SDavid du Colombier j.relist[1] = relist1; 222219b2ee8SDavid du Colombier j.reliste[0] = relist0 + nelem(relist0) - 2; 223219b2ee8SDavid du Colombier j.reliste[1] = relist1 + nelem(relist1) - 2; 224219b2ee8SDavid du Colombier 225219b2ee8SDavid du Colombier rv = regexec1(progp, bol, mp, ms, &j); 2263e12c5d1SDavid du Colombier if(rv >= 0) 2273e12c5d1SDavid du Colombier return rv; 228219b2ee8SDavid du Colombier rv = regexec2(progp, bol, mp, ms, &j); 229219b2ee8SDavid du Colombier if(rv >= 0) 230219b2ee8SDavid du Colombier return rv; 2313e12c5d1SDavid du Colombier return -1; 2323e12c5d1SDavid du Colombier } 233