1 #include <u.h> 2 #include <libc.h> 3 #include "regexp.h" 4 #include "regcomp.h" 5 6 /* 7 * return 0 if no match 8 * >0 if a match 9 * <0 if we ran out of _relist space 10 */ 11 static int 12 rregexec1(Reprog *progp, /* program to run */ 13 Rune *bol, /* string to run machine on */ 14 Resub *mp, /* subexpression elements */ 15 int ms, /* number of elements at mp */ 16 Reljunk *j) 17 { 18 int flag=0; 19 Reinst *inst; 20 Relist *tlp; 21 Rune *s; 22 int i, checkstart; 23 Rune r, *rp, *ep; 24 Relist* tl; /* This list, next list */ 25 Relist* nl; 26 Relist* tle; /* ends of this and next list */ 27 Relist* nle; 28 int match; 29 30 match = 0; 31 checkstart = j->startchar; 32 if(mp) 33 for(i=0; i<ms; i++) { 34 mp[i].rsp = 0; 35 mp[i].rep = 0; 36 } 37 j->relist[0][0].inst = 0; 38 j->relist[1][0].inst = 0; 39 40 /* Execute machine once for each character, including terminal NUL */ 41 s = j->rstarts; 42 do{ 43 44 /* fast check for first char */ 45 if(checkstart) { 46 switch(j->starttype) { 47 case RUNE: 48 while(*s != j->startchar) { 49 if(*s == 0 || s == j->reol) 50 return match; 51 s++; 52 } 53 break; 54 case BOL: 55 if(s == bol) 56 break; 57 while(*s != '\n') { 58 if(*s == 0 || s == j->reol) 59 return match; 60 s++; 61 } 62 break; 63 } 64 } 65 66 r = *s; 67 68 /* switch run lists */ 69 tl = j->relist[flag]; 70 tle = j->reliste[flag]; 71 nl = j->relist[flag^=1]; 72 nle = j->reliste[flag]; 73 nl->inst = 0; 74 75 /* Add first instruction to current list */ 76 _rrenewemptythread(tl, progp->startinst, ms, s); 77 78 /* Execute machine until current list is empty */ 79 for(tlp=tl; tlp->inst; tlp++){ 80 for(inst=tlp->inst; ; inst = inst->next){ 81 switch(inst->type){ 82 case RUNE: /* regular character */ 83 if(inst->r == r) 84 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 85 return -1; 86 break; 87 case LBRA: 88 tlp->se.m[inst->subid].rsp = s; 89 continue; 90 case RBRA: 91 tlp->se.m[inst->subid].rep = s; 92 continue; 93 case ANY: 94 if(r != '\n') 95 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 96 return -1; 97 break; 98 case ANYNL: 99 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 100 return -1; 101 break; 102 case BOL: 103 if(s == bol || *(s-1) == '\n') 104 continue; 105 break; 106 case EOL: 107 if(s == j->reol || r == 0 || r == '\n') 108 continue; 109 break; 110 case CCLASS: 111 ep = inst->cp->end; 112 for(rp = inst->cp->spans; rp < ep; rp += 2) 113 if(r >= rp[0] && r <= rp[1]){ 114 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 115 return -1; 116 break; 117 } 118 break; 119 case NCCLASS: 120 ep = inst->cp->end; 121 for(rp = inst->cp->spans; rp < ep; rp += 2) 122 if(r >= rp[0] && r <= rp[1]) 123 break; 124 if(rp == ep) 125 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) 126 return -1; 127 break; 128 case OR: 129 /* evaluate right choice later */ 130 if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) 131 return -1; 132 /* efficiency: advance and re-evaluate */ 133 continue; 134 case END: /* Match! */ 135 match = 1; 136 tlp->se.m[0].rep = s; 137 if(mp != 0) 138 _renewmatch(mp, ms, &tlp->se); 139 break; 140 } 141 break; 142 } 143 } 144 if(s == j->reol) 145 break; 146 checkstart = j->startchar && nl->inst==0; 147 s++; 148 }while(r); 149 return match; 150 } 151 152 static int 153 rregexec2(Reprog *progp, /* program to run */ 154 Rune *bol, /* string to run machine on */ 155 Resub *mp, /* subexpression elements */ 156 int ms, /* number of elements at mp */ 157 Reljunk *j 158 ) 159 { 160 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; 161 162 /* mark space */ 163 j->relist[0] = relist0; 164 j->relist[1] = relist1; 165 j->reliste[0] = relist0 + nelem(relist0) - 2; 166 j->reliste[1] = relist1 + nelem(relist1) - 2; 167 168 return rregexec1(progp, bol, mp, ms, j); 169 } 170 171 extern int 172 rregexec(Reprog *progp, /* program to run */ 173 Rune *bol, /* string to run machine on */ 174 Resub *mp, /* subexpression elements */ 175 int ms) /* number of elements at mp */ 176 { 177 Reljunk j; 178 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 179 int rv; 180 181 /* 182 * use user-specified starting/ending location if specified 183 */ 184 j.rstarts = bol; 185 j.reol = 0; 186 if(mp && ms>0){ 187 if(mp->sp) 188 j.rstarts = mp->rsp; 189 if(mp->ep) 190 j.reol = mp->rep; 191 } 192 j.starttype = 0; 193 j.startchar = 0; 194 if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { 195 j.starttype = RUNE; 196 j.startchar = progp->startinst->r; 197 } 198 if(progp->startinst->type == BOL) 199 j.starttype = BOL; 200 201 /* mark space */ 202 j.relist[0] = relist0; 203 j.relist[1] = relist1; 204 j.reliste[0] = relist0 + nelem(relist0) - 2; 205 j.reliste[1] = relist1 + nelem(relist1) - 2; 206 207 rv = rregexec1(progp, bol, mp, ms, &j); 208 if(rv >= 0) 209 return rv; 210 rv = rregexec2(progp, bol, mp, ms, &j); 211 if(rv >= 0) 212 return rv; 213 return -1; 214 } 215