1*3e12c5d1SDavid du Colombier #include <u.h> 2*3e12c5d1SDavid du Colombier #include <libc.h> 3*3e12c5d1SDavid du Colombier #include "regexp.h" 4*3e12c5d1SDavid du Colombier #include "regcomp.h" 5*3e12c5d1SDavid du Colombier 6*3e12c5d1SDavid du Colombier static Resublist sempty; /* empty set of matches */ 7*3e12c5d1SDavid du Colombier 8*3e12c5d1SDavid du Colombier /* 9*3e12c5d1SDavid du Colombier * return 0 if no match 10*3e12c5d1SDavid du Colombier * >0 if a match 11*3e12c5d1SDavid du Colombier * <0 if we ran out of _relist space 12*3e12c5d1SDavid du Colombier */ 13*3e12c5d1SDavid du Colombier static int 14*3e12c5d1SDavid du Colombier regexec1(Reprog *progp, /* program to run */ 15*3e12c5d1SDavid du Colombier char *bol, /* string to run machine on */ 16*3e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */ 17*3e12c5d1SDavid du Colombier int ms, /* number of elements at mp */ 18*3e12c5d1SDavid du Colombier char *starts, 19*3e12c5d1SDavid du Colombier char *eol, 20*3e12c5d1SDavid du Colombier Rune startchar) 21*3e12c5d1SDavid du Colombier { 22*3e12c5d1SDavid du Colombier int flag=0; 23*3e12c5d1SDavid du Colombier Reinst *inst; 24*3e12c5d1SDavid du Colombier Relist *tlp; 25*3e12c5d1SDavid du Colombier char *s; 26*3e12c5d1SDavid du Colombier int i, checkstart; 27*3e12c5d1SDavid du Colombier Rune r, *rp, *ep; 28*3e12c5d1SDavid du Colombier int n; 29*3e12c5d1SDavid du Colombier Relist* tl; /* This list, next list */ 30*3e12c5d1SDavid du Colombier Relist* nl; 31*3e12c5d1SDavid du Colombier Relist* tle; /* ends of this and next list */ 32*3e12c5d1SDavid du Colombier Relist* nle; 33*3e12c5d1SDavid du Colombier int match; 34*3e12c5d1SDavid du Colombier char *p; 35*3e12c5d1SDavid du Colombier 36*3e12c5d1SDavid du Colombier match = 0; 37*3e12c5d1SDavid du Colombier checkstart = startchar; 38*3e12c5d1SDavid du Colombier sempty.m[0].sp = 0; 39*3e12c5d1SDavid du Colombier if(mp!=0) 40*3e12c5d1SDavid du Colombier for(i=0; i<ms; i++) 41*3e12c5d1SDavid du Colombier mp[i].sp = mp[i].ep = 0; 42*3e12c5d1SDavid du Colombier _relist[0][0].inst = _relist[1][0].inst = 0; 43*3e12c5d1SDavid du Colombier 44*3e12c5d1SDavid du Colombier /* Execute machine once for each character, including terminal NUL */ 45*3e12c5d1SDavid du Colombier s = starts; 46*3e12c5d1SDavid du Colombier do{ 47*3e12c5d1SDavid du Colombier /* fast check for first char */ 48*3e12c5d1SDavid du Colombier if(checkstart){ 49*3e12c5d1SDavid du Colombier p = utfrune(s, startchar); 50*3e12c5d1SDavid du Colombier if(p == 0) 51*3e12c5d1SDavid du Colombier return match; 52*3e12c5d1SDavid du Colombier s = p; 53*3e12c5d1SDavid du Colombier } 54*3e12c5d1SDavid du Colombier r = *(uchar*)s; 55*3e12c5d1SDavid du Colombier if(r < Runeself) 56*3e12c5d1SDavid du Colombier n = 1; 57*3e12c5d1SDavid du Colombier else 58*3e12c5d1SDavid du Colombier n = chartorune(&r, s); 59*3e12c5d1SDavid du Colombier 60*3e12c5d1SDavid du Colombier /* switch run lists */ 61*3e12c5d1SDavid du Colombier tl = _relist[flag]; 62*3e12c5d1SDavid du Colombier tle = _reliste[flag]; 63*3e12c5d1SDavid du Colombier nl = _relist[flag^=1]; 64*3e12c5d1SDavid du Colombier nle = _reliste[flag]; 65*3e12c5d1SDavid du Colombier nl->inst = 0; 66*3e12c5d1SDavid du Colombier 67*3e12c5d1SDavid du Colombier /* Add first instruction to current list */ 68*3e12c5d1SDavid du Colombier if(match == 0){ 69*3e12c5d1SDavid du Colombier sempty.m[0].sp = s; 70*3e12c5d1SDavid du Colombier _renewthread(tl, progp->startinst, &sempty); 71*3e12c5d1SDavid du Colombier } 72*3e12c5d1SDavid du Colombier 73*3e12c5d1SDavid du Colombier /* Execute machine until current list is empty */ 74*3e12c5d1SDavid du Colombier for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 75*3e12c5d1SDavid du Colombier if(s == eol) 76*3e12c5d1SDavid du Colombier break; 77*3e12c5d1SDavid du Colombier 78*3e12c5d1SDavid du Colombier for(inst = tlp->inst; ; inst = inst->next){ 79*3e12c5d1SDavid du Colombier switch(inst->type){ 80*3e12c5d1SDavid du Colombier case RUNE: /* regular character */ 81*3e12c5d1SDavid du Colombier if(inst->r == r) 82*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->next, &tlp->se)==nle) 83*3e12c5d1SDavid du Colombier return -1; 84*3e12c5d1SDavid du Colombier break; 85*3e12c5d1SDavid du Colombier case LBRA: 86*3e12c5d1SDavid du Colombier tlp->se.m[inst->subid].sp = s; 87*3e12c5d1SDavid du Colombier continue; 88*3e12c5d1SDavid du Colombier case RBRA: 89*3e12c5d1SDavid du Colombier tlp->se.m[inst->subid].ep = s; 90*3e12c5d1SDavid du Colombier continue; 91*3e12c5d1SDavid du Colombier case ANY: 92*3e12c5d1SDavid du Colombier if(r != '\n') 93*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->next, &tlp->se)==nle) 94*3e12c5d1SDavid du Colombier return -1; 95*3e12c5d1SDavid du Colombier break; 96*3e12c5d1SDavid du Colombier case ANYNL: 97*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->next, &tlp->se)==nle) 98*3e12c5d1SDavid du Colombier return -1; 99*3e12c5d1SDavid du Colombier break; 100*3e12c5d1SDavid du Colombier case BOL: 101*3e12c5d1SDavid du Colombier if(s == bol || *(s-1) == '\n') 102*3e12c5d1SDavid du Colombier continue; 103*3e12c5d1SDavid du Colombier break; 104*3e12c5d1SDavid du Colombier case EOL: 105*3e12c5d1SDavid du Colombier if(r == 0 || r == '\n') 106*3e12c5d1SDavid du Colombier continue; 107*3e12c5d1SDavid du Colombier break; 108*3e12c5d1SDavid du Colombier case CCLASS: 109*3e12c5d1SDavid du Colombier ep = inst->cp->end; 110*3e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2) 111*3e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]){ 112*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->next, &tlp->se)==nle) 113*3e12c5d1SDavid du Colombier return -1; 114*3e12c5d1SDavid du Colombier break; 115*3e12c5d1SDavid du Colombier } 116*3e12c5d1SDavid du Colombier break; 117*3e12c5d1SDavid du Colombier case NCCLASS: 118*3e12c5d1SDavid du Colombier ep = inst->cp->end; 119*3e12c5d1SDavid du Colombier for(rp = inst->cp->spans; rp < ep; rp += 2) 120*3e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]) 121*3e12c5d1SDavid du Colombier break; 122*3e12c5d1SDavid du Colombier if(rp == ep) 123*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->next, &tlp->se)==nle) 124*3e12c5d1SDavid du Colombier return -1; 125*3e12c5d1SDavid du Colombier break; 126*3e12c5d1SDavid du Colombier case OR: 127*3e12c5d1SDavid du Colombier /* evaluate right choice later */ 128*3e12c5d1SDavid du Colombier if(_renewthread(tlp, inst->right, &tlp->se) == tle) 129*3e12c5d1SDavid du Colombier return -1; 130*3e12c5d1SDavid du Colombier /* efficiency: advance and re-evaluate */ 131*3e12c5d1SDavid du Colombier continue; 132*3e12c5d1SDavid du Colombier case END: /* Match! */ 133*3e12c5d1SDavid du Colombier match = 1; 134*3e12c5d1SDavid du Colombier tlp->se.m[0].ep = s; 135*3e12c5d1SDavid du Colombier if(mp != 0) 136*3e12c5d1SDavid du Colombier _renewmatch(mp, ms, &tlp->se); 137*3e12c5d1SDavid du Colombier break; 138*3e12c5d1SDavid du Colombier } 139*3e12c5d1SDavid du Colombier break; 140*3e12c5d1SDavid du Colombier } 141*3e12c5d1SDavid du Colombier } 142*3e12c5d1SDavid du Colombier checkstart = startchar && nl->inst==0; 143*3e12c5d1SDavid du Colombier s += n; 144*3e12c5d1SDavid du Colombier }while(r); 145*3e12c5d1SDavid du Colombier return match; 146*3e12c5d1SDavid du Colombier } 147*3e12c5d1SDavid du Colombier 148*3e12c5d1SDavid du Colombier extern int 149*3e12c5d1SDavid du Colombier regexec(Reprog *progp, /* program to run */ 150*3e12c5d1SDavid du Colombier char *bol, /* string to run machine on */ 151*3e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */ 152*3e12c5d1SDavid du Colombier int ms) /* number of elements at mp */ 153*3e12c5d1SDavid du Colombier { 154*3e12c5d1SDavid du Colombier char *starts; /* where to start match */ 155*3e12c5d1SDavid du Colombier char *eol; /* where to end match */ 156*3e12c5d1SDavid du Colombier Rune startchar; 157*3e12c5d1SDavid du Colombier int rv; 158*3e12c5d1SDavid du Colombier 159*3e12c5d1SDavid du Colombier /* 160*3e12c5d1SDavid du Colombier * use user-specified starting/ending location if specified 161*3e12c5d1SDavid du Colombier */ 162*3e12c5d1SDavid du Colombier starts = bol; 163*3e12c5d1SDavid du Colombier eol = 0; 164*3e12c5d1SDavid du Colombier if(mp && ms>0){ 165*3e12c5d1SDavid du Colombier if(mp->sp) 166*3e12c5d1SDavid du Colombier starts = mp->sp; 167*3e12c5d1SDavid du Colombier if(mp->ep) 168*3e12c5d1SDavid du Colombier eol = mp->ep; 169*3e12c5d1SDavid du Colombier } 170*3e12c5d1SDavid du Colombier startchar = (progp->startinst->type == RUNE && progp->startinst->r < Runeself) 171*3e12c5d1SDavid du Colombier ? progp->startinst->r : 0; 172*3e12c5d1SDavid du Colombier 173*3e12c5d1SDavid du Colombier /* keep trying till we have enough list space to terminate */ 174*3e12c5d1SDavid du Colombier for(;;){ 175*3e12c5d1SDavid du Colombier if(_relist[0] == 0){ 176*3e12c5d1SDavid du Colombier _relist[0] = malloc(2*_relistsize*sizeof(Relist)); 177*3e12c5d1SDavid du Colombier _relist[1] = _relist[0] + _relistsize; 178*3e12c5d1SDavid du Colombier _reliste[0] = _relist[0] + _relistsize - 1; 179*3e12c5d1SDavid du Colombier _reliste[1] = _relist[1] + _relistsize - 1; 180*3e12c5d1SDavid du Colombier if(_relist[0] == 0) 181*3e12c5d1SDavid du Colombier regerror("_relist overflow"); 182*3e12c5d1SDavid du Colombier } 183*3e12c5d1SDavid du Colombier rv = regexec1(progp, bol, mp, ms, starts, eol, startchar); 184*3e12c5d1SDavid du Colombier if(rv >= 0) 185*3e12c5d1SDavid du Colombier return rv; 186*3e12c5d1SDavid du Colombier free(_relist[0]); 187*3e12c5d1SDavid du Colombier _relist[0] = 0; 188*3e12c5d1SDavid du Colombier _relistsize += LISTINCREMENT; 189*3e12c5d1SDavid du Colombier } 190*3e12c5d1SDavid du Colombier return -1; 191*3e12c5d1SDavid du Colombier } 192