1 #include <u.h> 2 #include <libc.h> 3 #include "regexp.h" 4 #include "regcomp.h" 5 6 7 /* 8 * return 0 if no match 9 * >0 if a match 10 * <0 if we ran out of _relist space 11 */ 12 static int 13 regexec1(Reprog *progp, /* program to run */ 14 char *bol, /* string to run machine on */ 15 Resub *mp, /* subexpression elements */ 16 int ms, /* number of elements at mp */ 17 Reljunk *j 18 ) 19 { 20 int flag=0; 21 Reinst *inst; 22 Relist *tlp; 23 char *s; 24 int i, checkstart; 25 Rune r, *rp, *ep; 26 int n; 27 Relist* tl; /* This list, next list */ 28 Relist* nl; 29 Relist* tle; /* ends of this and next list */ 30 Relist* nle; 31 int match; 32 char *p; 33 34 match = 0; 35 checkstart = j->starttype; 36 if(mp) 37 for(i=0; i<ms; i++) { 38 mp[i].sp = 0; 39 mp[i].ep = 0; 40 } 41 j->relist[0][0].inst = 0; 42 j->relist[1][0].inst = 0; 43 44 /* Execute machine once for each character, including terminal NUL */ 45 s = j->starts; 46 do{ 47 /* fast check for first char */ 48 if(checkstart) { 49 switch(j->starttype) { 50 case RUNE: 51 p = utfrune(s, j->startchar); 52 if(p == 0) 53 return match; 54 s = p; 55 break; 56 case BOL: 57 if(s == bol) 58 break; 59 p = utfrune(s, '\n'); 60 if(p == 0) 61 return match; 62 s = p; 63 break; 64 } 65 } 66 r = *(uchar*)s; 67 if(r < Runeself) 68 n = 1; 69 else 70 n = chartorune(&r, s); 71 72 /* switch run lists */ 73 tl = j->relist[flag]; 74 tle = j->reliste[flag]; 75 nl = j->relist[flag^=1]; 76 nle = j->reliste[flag]; 77 nl->inst = 0; 78 79 /* Add first instruction to current list */ 80 if(match == 0) 81 _renewemptythread(tl, progp->startinst, s); 82 83 /* Execute machine until current list is empty */ 84 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 85 if(s == j->eol) 86 break; 87 88 for(inst = tlp->inst; ; inst = inst->next){ 89 switch(inst->type){ 90 case RUNE: /* regular character */ 91 if(inst->r == r){ 92 if(_renewthread(nl, inst->next, &tlp->se)==nle) 93 return -1; 94 } 95 break; 96 case LBRA: 97 tlp->se.m[inst->subid].sp = s; 98 continue; 99 case RBRA: 100 tlp->se.m[inst->subid].ep = s; 101 continue; 102 case ANY: 103 if(r != '\n') 104 if(_renewthread(nl, inst->next, &tlp->se)==nle) 105 return -1; 106 break; 107 case ANYNL: 108 if(_renewthread(nl, inst->next, &tlp->se)==nle) 109 return -1; 110 break; 111 case BOL: 112 if(s == bol || *(s-1) == '\n') 113 continue; 114 break; 115 case EOL: 116 if(r == 0 || r == '\n') 117 continue; 118 break; 119 case CCLASS: 120 ep = inst->cp->end; 121 for(rp = inst->cp->spans; rp < ep; rp += 2) 122 if(r >= rp[0] && r <= rp[1]){ 123 if(_renewthread(nl, inst->next, &tlp->se)==nle) 124 return -1; 125 break; 126 } 127 break; 128 case NCCLASS: 129 ep = inst->cp->end; 130 for(rp = inst->cp->spans; rp < ep; rp += 2) 131 if(r >= rp[0] && r <= rp[1]) 132 break; 133 if(rp == ep) 134 if(_renewthread(nl, inst->next, &tlp->se)==nle) 135 return -1; 136 break; 137 case OR: 138 /* evaluate right choice later */ 139 if(_renewthread(tlp, inst->right, &tlp->se) == tle) 140 return -1; 141 /* efficiency: advance and re-evaluate */ 142 continue; 143 case END: /* Match! */ 144 match = 1; 145 tlp->se.m[0].ep = s; 146 if(mp != 0) 147 _renewmatch(mp, ms, &tlp->se); 148 break; 149 } 150 break; 151 } 152 } 153 checkstart = j->starttype && nl->inst==0; 154 s += n; 155 }while(r); 156 return match; 157 } 158 159 static int 160 regexec2(Reprog *progp, /* program to run */ 161 char *bol, /* string to run machine on */ 162 Resub *mp, /* subexpression elements */ 163 int ms, /* number of elements at mp */ 164 Reljunk *j 165 ) 166 { 167 Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE]; 168 169 /* mark space */ 170 j->relist[0] = relist0; 171 j->relist[1] = relist1; 172 j->reliste[0] = relist0 + nelem(relist0) - 2; 173 j->reliste[1] = relist1 + nelem(relist1) - 2; 174 175 return regexec1(progp, bol, mp, ms, j); 176 } 177 178 extern int 179 regexec(Reprog *progp, /* program to run */ 180 char *bol, /* string to run machine on */ 181 Resub *mp, /* subexpression elements */ 182 int ms) /* number of elements at mp */ 183 { 184 Reljunk j; 185 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 186 int rv; 187 188 /* 189 * use user-specified starting/ending location if specified 190 */ 191 j.starts = bol; 192 j.eol = 0; 193 if(mp && ms>0){ 194 if(mp->sp) 195 j.starts = mp->sp; 196 if(mp->ep) 197 j.eol = mp->ep; 198 } 199 j.starttype = 0; 200 j.startchar = 0; 201 if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { 202 j.starttype = RUNE; 203 j.startchar = progp->startinst->r; 204 } 205 if(progp->startinst->type == BOL) 206 j.starttype = BOL; 207 208 /* mark space */ 209 j.relist[0] = relist0; 210 j.relist[1] = relist1; 211 j.reliste[0] = relist0 + nelem(relist0) - 2; 212 j.reliste[1] = relist1 + nelem(relist1) - 2; 213 214 rv = regexec1(progp, bol, mp, ms, &j); 215 if(rv >= 0) 216 return rv; 217 rv = regexec2(progp, bol, mp, ms, &j); 218 if(rv >= 0) 219 return rv; 220 return -1; 221 } 222