13e12c5d1SDavid du Colombier #include <u.h> 23e12c5d1SDavid du Colombier #include <libc.h> 33e12c5d1SDavid du Colombier #include "regexp.h" 43e12c5d1SDavid du Colombier #include "regcomp.h" 53e12c5d1SDavid du Colombier 63e12c5d1SDavid du Colombier /* 73e12c5d1SDavid du Colombier * return 0 if no match 83e12c5d1SDavid du Colombier * >0 if a match 93e12c5d1SDavid du Colombier * <0 if we ran out of _relist space 103e12c5d1SDavid du Colombier */ 113e12c5d1SDavid du Colombier static int 123e12c5d1SDavid du Colombier rregexec1(Reprog *progp, /* program to run */ 133e12c5d1SDavid du Colombier Rune *bol, /* string to run machine on */ 143e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */ 153e12c5d1SDavid du Colombier int ms, /* number of elements at mp */ 16219b2ee8SDavid du Colombier Reljunk *j) 173e12c5d1SDavid du Colombier { 183e12c5d1SDavid du Colombier int flag=0; 193e12c5d1SDavid du Colombier Reinst *inst; 203e12c5d1SDavid du Colombier Relist *tlp; 213e12c5d1SDavid du Colombier Rune *s; 223e12c5d1SDavid du Colombier int i, checkstart; 233e12c5d1SDavid du Colombier Rune r, *rp, *ep; 243e12c5d1SDavid du Colombier Relist* tl; /* This list, next list */ 253e12c5d1SDavid du Colombier Relist* nl; 263e12c5d1SDavid du Colombier Relist* tle; /* ends of this and next list */ 273e12c5d1SDavid du Colombier Relist* nle; 283e12c5d1SDavid du Colombier int match; 293e12c5d1SDavid du Colombier 303e12c5d1SDavid du Colombier match = 0; 31219b2ee8SDavid du Colombier checkstart = j->startchar; 32219b2ee8SDavid du Colombier if(mp) 33219b2ee8SDavid du Colombier for(i=0; i<ms; i++) { 34219b2ee8SDavid du Colombier mp[i].rsp = 0; 35219b2ee8SDavid du Colombier mp[i].rep = 0; 36219b2ee8SDavid du Colombier } 37219b2ee8SDavid du Colombier j->relist[0][0].inst = 0; 38219b2ee8SDavid du Colombier j->relist[1][0].inst = 0; 393e12c5d1SDavid du Colombier 403e12c5d1SDavid du Colombier /* Execute machine once for each character, including terminal NUL */ 41219b2ee8SDavid du Colombier s = j->rstarts; 423e12c5d1SDavid du Colombier do{ 433e12c5d1SDavid du Colombier 443e12c5d1SDavid du Colombier /* fast check for first char */ 45219b2ee8SDavid du Colombier if(checkstart) { 46219b2ee8SDavid du Colombier switch(j->starttype) { 47219b2ee8SDavid du Colombier case RUNE: 48219b2ee8SDavid du Colombier while(*s != j->startchar) { 49219b2ee8SDavid du Colombier if(*s == 0) 50219b2ee8SDavid du Colombier return match; 513e12c5d1SDavid du Colombier s++; 52219b2ee8SDavid du Colombier } 53219b2ee8SDavid du Colombier break; 54219b2ee8SDavid du Colombier case BOL: 55219b2ee8SDavid du Colombier if(s == bol) 56219b2ee8SDavid du Colombier break; 57219b2ee8SDavid du Colombier while(*s != '\n') { 58219b2ee8SDavid du Colombier if(*s == 0) 59219b2ee8SDavid du Colombier return match; 60219b2ee8SDavid du Colombier s++; 61219b2ee8SDavid du Colombier } 62219b2ee8SDavid du Colombier break; 63219b2ee8SDavid du Colombier } 643e12c5d1SDavid du Colombier } 653e12c5d1SDavid du Colombier 66219b2ee8SDavid du Colombier r = *s; 67219b2ee8SDavid du Colombier 683e12c5d1SDavid du Colombier /* switch run lists */ 69219b2ee8SDavid du Colombier tl = j->relist[flag]; 70219b2ee8SDavid du Colombier tle = j->reliste[flag]; 71219b2ee8SDavid du Colombier nl = j->relist[flag^=1]; 72219b2ee8SDavid du Colombier nle = j->reliste[flag]; 733e12c5d1SDavid du Colombier nl->inst = 0; 743e12c5d1SDavid du Colombier 753e12c5d1SDavid du Colombier /* Add first instruction to current list */ 76*7dd7cddfSDavid du Colombier _rrenewemptythread(tl, progp->startinst, ms, s); 773e12c5d1SDavid du Colombier 783e12c5d1SDavid du Colombier /* Execute machine until current list is empty */ 79219b2ee8SDavid du Colombier for(tlp=tl; tlp->inst; tlp++){ 803e12c5d1SDavid du Colombier for(inst=tlp->inst; ; inst = inst->next){ 813e12c5d1SDavid du Colombier switch(inst->type){ 823e12c5d1SDavid du Colombier case RUNE: /* regular character */ 833e12c5d1SDavid du Colombier if(inst->r == r) 84*7dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 853e12c5d1SDavid du Colombier return -1; 863e12c5d1SDavid du Colombier break; 873e12c5d1SDavid du Colombier case LBRA: 883e12c5d1SDavid du Colombier tlp->se.m[inst->subid].rsp = s; 893e12c5d1SDavid du Colombier continue; 903e12c5d1SDavid du Colombier case RBRA: 913e12c5d1SDavid du Colombier tlp->se.m[inst->subid].rep = s; 923e12c5d1SDavid du Colombier continue; 933e12c5d1SDavid du Colombier case ANY: 943e12c5d1SDavid du Colombier if(r != '\n') 95*7dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 963e12c5d1SDavid du Colombier return -1; 973e12c5d1SDavid du Colombier break; 983e12c5d1SDavid du Colombier case ANYNL: 99*7dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1003e12c5d1SDavid du Colombier return -1; 1013e12c5d1SDavid du Colombier break; 1023e12c5d1SDavid du Colombier case BOL: 1033e12c5d1SDavid du Colombier if(s == bol || *(s-1) == '\n') 1043e12c5d1SDavid du Colombier continue; 1053e12c5d1SDavid du Colombier break; 1063e12c5d1SDavid du Colombier case EOL: 107*7dd7cddfSDavid du Colombier if(s == j->reol || r == 0 || r == '\n') 1083e12c5d1SDavid du Colombier continue; 1093e12c5d1SDavid du Colombier break; 1103e12c5d1SDavid du Colombier case CCLASS: 1113e12c5d1SDavid du Colombier ep = inst->cp->end; 1123e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2) 1133e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]){ 114*7dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1153e12c5d1SDavid du Colombier return -1; 1163e12c5d1SDavid du Colombier break; 1173e12c5d1SDavid du Colombier } 1183e12c5d1SDavid du Colombier break; 1193e12c5d1SDavid du Colombier case NCCLASS: 1203e12c5d1SDavid du Colombier ep = inst->cp->end; 1213e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2) 1223e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]) 1233e12c5d1SDavid du Colombier break; 1243e12c5d1SDavid du Colombier if(rp == ep) 125*7dd7cddfSDavid du Colombier if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 1263e12c5d1SDavid du Colombier return -1; 1273e12c5d1SDavid du Colombier break; 1283e12c5d1SDavid du Colombier case OR: 1293e12c5d1SDavid du Colombier /* evaluate right choice later */ 130*7dd7cddfSDavid du Colombier if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) 1313e12c5d1SDavid du Colombier return -1; 1323e12c5d1SDavid du Colombier /* efficiency: advance and re-evaluate */ 1333e12c5d1SDavid du Colombier continue; 1343e12c5d1SDavid du Colombier case END: /* Match! */ 1353e12c5d1SDavid du Colombier match = 1; 1363e12c5d1SDavid du Colombier tlp->se.m[0].rep = s; 1373e12c5d1SDavid du Colombier if(mp != 0) 1383e12c5d1SDavid du Colombier _renewmatch(mp, ms, &tlp->se); 1393e12c5d1SDavid du Colombier break; 1403e12c5d1SDavid du Colombier } 1413e12c5d1SDavid du Colombier break; 1423e12c5d1SDavid du Colombier } 1433e12c5d1SDavid du Colombier } 144*7dd7cddfSDavid du Colombier if(s == j->reol) 145*7dd7cddfSDavid du Colombier break; 146219b2ee8SDavid du Colombier checkstart = j->startchar && nl->inst==0; 1473e12c5d1SDavid du Colombier s++; 1483e12c5d1SDavid du Colombier }while(r); 1493e12c5d1SDavid du Colombier return match; 1503e12c5d1SDavid du Colombier } 1513e12c5d1SDavid du Colombier 152219b2ee8SDavid du Colombier static int 153219b2ee8SDavid du Colombier rregexec2(Reprog *progp, /* program to run */ 154219b2ee8SDavid du Colombier Rune *bol, /* string to run machine on */ 155219b2ee8SDavid du Colombier Resub *mp, /* subexpression elements */ 156219b2ee8SDavid du Colombier int ms, /* number of elements at mp */ 157219b2ee8SDavid du Colombier Reljunk *j 158219b2ee8SDavid du Colombier ) 159219b2ee8SDavid du Colombier { 160219b2ee8SDavid du Colombier Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; 161219b2ee8SDavid du Colombier 162219b2ee8SDavid du Colombier /* mark space */ 163219b2ee8SDavid du Colombier j->relist[0] = relist0; 164219b2ee8SDavid du Colombier j->relist[1] = relist1; 165219b2ee8SDavid du Colombier j->reliste[0] = relist0 + nelem(relist0) - 2; 166219b2ee8SDavid du Colombier j->reliste[1] = relist1 + nelem(relist1) - 2; 167219b2ee8SDavid du Colombier 168219b2ee8SDavid du Colombier return rregexec1(progp, bol, mp, ms, j); 169219b2ee8SDavid du Colombier } 170219b2ee8SDavid du Colombier 1713e12c5d1SDavid du Colombier extern int 1723e12c5d1SDavid du Colombier rregexec(Reprog *progp, /* program to run */ 1733e12c5d1SDavid du Colombier Rune *bol, /* string to run machine on */ 1743e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */ 1753e12c5d1SDavid du Colombier int ms) /* number of elements at mp */ 1763e12c5d1SDavid du Colombier { 177219b2ee8SDavid du Colombier Reljunk j; 178219b2ee8SDavid du Colombier Relist relist0[LISTSIZE], relist1[LISTSIZE]; 1793e12c5d1SDavid du Colombier int rv; 1803e12c5d1SDavid du Colombier 1813e12c5d1SDavid du Colombier /* 1823e12c5d1SDavid du Colombier * use user-specified starting/ending location if specified 1833e12c5d1SDavid du Colombier */ 184219b2ee8SDavid du Colombier j.rstarts = bol; 185219b2ee8SDavid du Colombier j.reol = 0; 1863e12c5d1SDavid du Colombier if(mp && ms>0){ 187219b2ee8SDavid du Colombier if(mp->sp) 188219b2ee8SDavid du Colombier j.rstarts = mp->rsp; 189219b2ee8SDavid du Colombier if(mp->ep) 190219b2ee8SDavid du Colombier j.reol = mp->rep; 1913e12c5d1SDavid du Colombier } 192219b2ee8SDavid du Colombier j.starttype = 0; 193219b2ee8SDavid du Colombier j.startchar = 0; 194219b2ee8SDavid du Colombier if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { 195219b2ee8SDavid du Colombier j.starttype = RUNE; 196219b2ee8SDavid du Colombier j.startchar = progp->startinst->r; 197219b2ee8SDavid du Colombier } 198219b2ee8SDavid du Colombier if(progp->startinst->type == BOL) 199219b2ee8SDavid du Colombier j.starttype = BOL; 2003e12c5d1SDavid du Colombier 201219b2ee8SDavid du Colombier /* mark space */ 202219b2ee8SDavid du Colombier j.relist[0] = relist0; 203219b2ee8SDavid du Colombier j.relist[1] = relist1; 204219b2ee8SDavid du Colombier j.reliste[0] = relist0 + nelem(relist0) - 2; 205219b2ee8SDavid du Colombier j.reliste[1] = relist1 + nelem(relist1) - 2; 206219b2ee8SDavid du Colombier 207219b2ee8SDavid du Colombier rv = rregexec1(progp, bol, mp, ms, &j); 2083e12c5d1SDavid du Colombier if(rv >= 0) 2093e12c5d1SDavid du Colombier return rv; 210219b2ee8SDavid du Colombier rv = rregexec2(progp, bol, mp, ms, &j); 211219b2ee8SDavid du Colombier if(rv >= 0) 212219b2ee8SDavid du Colombier return rv; 2133e12c5d1SDavid du Colombier return -1; 2143e12c5d1SDavid du Colombier } 215