1 #include <u.h> 2 #include <libc.h> 3 #include "regexp.h" 4 #include "regcomp.h" 5 6 static Resublist sempty; /* empty set of matches */ 7 8 /* 9 * return 0 if no match 10 * >0 if a match 11 * <0 if we ran out of _relist space 12 */ 13 static int 14 regexec1(Reprog *progp, /* program to run */ 15 char *bol, /* string to run machine on */ 16 Resub *mp, /* subexpression elements */ 17 int ms, /* number of elements at mp */ 18 char *starts, 19 char *eol, 20 Rune startchar) 21 { 22 int flag=0; 23 Reinst *inst; 24 Relist *tlp; 25 char *s; 26 int i, checkstart; 27 Rune r, *rp, *ep; 28 int n; 29 Relist* tl; /* This list, next list */ 30 Relist* nl; 31 Relist* tle; /* ends of this and next list */ 32 Relist* nle; 33 int match; 34 char *p; 35 36 match = 0; 37 checkstart = startchar; 38 sempty.m[0].sp = 0; 39 if(mp!=0) 40 for(i=0; i<ms; i++) 41 mp[i].sp = mp[i].ep = 0; 42 _relist[0][0].inst = _relist[1][0].inst = 0; 43 44 /* Execute machine once for each character, including terminal NUL */ 45 s = starts; 46 do{ 47 /* fast check for first char */ 48 if(checkstart){ 49 p = utfrune(s, startchar); 50 if(p == 0) 51 return match; 52 s = p; 53 } 54 r = *(uchar*)s; 55 if(r < Runeself) 56 n = 1; 57 else 58 n = chartorune(&r, s); 59 60 /* switch run lists */ 61 tl = _relist[flag]; 62 tle = _reliste[flag]; 63 nl = _relist[flag^=1]; 64 nle = _reliste[flag]; 65 nl->inst = 0; 66 67 /* Add first instruction to current list */ 68 if(match == 0){ 69 sempty.m[0].sp = s; 70 _renewthread(tl, progp->startinst, &sempty); 71 } 72 73 /* Execute machine until current list is empty */ 74 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 75 if(s == eol) 76 break; 77 78 for(inst = tlp->inst; ; inst = inst->next){ 79 switch(inst->type){ 80 case RUNE: /* regular character */ 81 if(inst->r == r) 82 if(_renewthread(nl, inst->next, &tlp->se)==nle) 83 return -1; 84 break; 85 case LBRA: 86 tlp->se.m[inst->subid].sp = s; 87 continue; 88 case RBRA: 89 tlp->se.m[inst->subid].ep = s; 90 continue; 91 case ANY: 92 if(r != '\n') 93 if(_renewthread(nl, inst->next, &tlp->se)==nle) 94 return -1; 95 break; 96 case ANYNL: 97 if(_renewthread(nl, inst->next, &tlp->se)==nle) 98 return -1; 99 break; 100 case BOL: 101 if(s == bol || *(s-1) == '\n') 102 continue; 103 break; 104 case EOL: 105 if(r == 0 || r == '\n') 106 continue; 107 break; 108 case CCLASS: 109 ep = inst->cp->end; 110 for(rp = inst->cp->spans; rp < ep; rp += 2) 111 if(r >= rp[0] && r <= rp[1]){ 112 if(_renewthread(nl, inst->next, &tlp->se)==nle) 113 return -1; 114 break; 115 } 116 break; 117 case NCCLASS: 118 ep = inst->cp->end; 119 for(rp = inst->cp->spans; rp < ep; rp += 2) 120 if(r >= rp[0] && r <= rp[1]) 121 break; 122 if(rp == ep) 123 if(_renewthread(nl, inst->next, &tlp->se)==nle) 124 return -1; 125 break; 126 case OR: 127 /* evaluate right choice later */ 128 if(_renewthread(tlp, inst->right, &tlp->se) == tle) 129 return -1; 130 /* efficiency: advance and re-evaluate */ 131 continue; 132 case END: /* Match! */ 133 match = 1; 134 tlp->se.m[0].ep = s; 135 if(mp != 0) 136 _renewmatch(mp, ms, &tlp->se); 137 break; 138 } 139 break; 140 } 141 } 142 checkstart = startchar && nl->inst==0; 143 s += n; 144 }while(r); 145 return match; 146 } 147 148 extern int 149 regexec(Reprog *progp, /* program to run */ 150 char *bol, /* string to run machine on */ 151 Resub *mp, /* subexpression elements */ 152 int ms) /* number of elements at mp */ 153 { 154 char *starts; /* where to start match */ 155 char *eol; /* where to end match */ 156 Rune startchar; 157 int rv; 158 159 /* 160 * use user-specified starting/ending location if specified 161 */ 162 starts = bol; 163 eol = 0; 164 if(mp && ms>0){ 165 if(mp->sp) 166 starts = mp->sp; 167 if(mp->ep) 168 eol = mp->ep; 169 } 170 startchar = (progp->startinst->type == RUNE && progp->startinst->r < Runeself) 171 ? progp->startinst->r : 0; 172 173 /* keep trying till we have enough list space to terminate */ 174 for(;;){ 175 if(_relist[0] == 0){ 176 _relist[0] = malloc(2*_relistsize*sizeof(Relist)); 177 _relist[1] = _relist[0] + _relistsize; 178 _reliste[0] = _relist[0] + _relistsize - 1; 179 _reliste[1] = _relist[1] + _relistsize - 1; 180 if(_relist[0] == 0) 181 regerror("_relist overflow"); 182 } 183 rv = regexec1(progp, bol, mp, ms, starts, eol, startchar); 184 if(rv >= 0) 185 return rv; 186 free(_relist[0]); 187 _relist[0] = 0; 188 _relistsize += LISTINCREMENT; 189 } 190 return -1; 191 } 192