1 #include <stdlib.h> 2 #include <stdio.h> 3 #include "regexp.h" 4 #include "regcomp.h" 5 6 static Resublist sempty; /* empty set of matches */ 7 8 /* 9 * return 0 if no match 10 * >0 if a match 11 * <0 if we ran out of _relist space 12 */ 13 static int 14 regexec1(Reprog *progp, /* program to run */ 15 char *bol, /* string to run machine on */ 16 Resub *mp, /* subexpression elements */ 17 int ms, /* number of elements at mp */ 18 char *starts, 19 char *eol, 20 wchar_t startchar) 21 { 22 int flag=0; 23 Reinst *inst; 24 Relist *tlp; 25 char *s; 26 int i, checkstart; 27 wchar_t r, *rp, *ep; 28 int n; 29 Relist* tl; /* This list, next list */ 30 Relist* nl; 31 Relist* tle; /* ends of this and next list */ 32 Relist* nle; 33 int match; 34 35 match = 0; 36 checkstart = startchar; 37 sempty.m[0].s.sp = 0; 38 if(mp!=0) 39 for(i=0; i<ms; i++) 40 mp[i].s.sp = mp[i].e.ep = 0; 41 _relist[0][0].inst = _relist[1][0].inst = 0; 42 43 /* Execute machine once for each character, including terminal NUL */ 44 s = starts; 45 do{ 46 /* fast check for first char */ 47 r = *(unsigned char*)s; 48 if(checkstart && r != startchar){ 49 s++; 50 continue; 51 } 52 53 if(r < Runeself) 54 n = 1; 55 else { 56 n = mbtowc(&r, s, MB_CUR_MAX); 57 if (n <= 0) 58 n = 1; 59 } 60 61 /* switch run lists */ 62 tl = _relist[flag]; 63 tle = _reliste[flag]; 64 nl = _relist[flag^=1]; 65 nle = _reliste[flag]; 66 nl->inst = 0; 67 68 /* Add first instruction to current list */ 69 if(match == 0){ 70 sempty.m[0].s.sp = s; 71 _renewthread(tl, progp->startinst, &sempty); 72 } 73 74 /* Execute machine until current list is empty */ 75 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 76 if(s == eol) 77 break; 78 79 for(inst = tlp->inst; ; inst = inst->l.next){ 80 switch(inst->type){ 81 case RUNE: /* regular character */ 82 if(inst->r.r == r) 83 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 84 return -1; 85 break; 86 case LBRA: 87 tlp->se.m[inst->r.subid].s.sp = s; 88 continue; 89 case RBRA: 90 tlp->se.m[inst->r.subid].e.ep = s; 91 continue; 92 case ANY: 93 if(r != '\n') 94 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 95 return -1; 96 break; 97 case ANYNL: 98 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 99 return -1; 100 break; 101 case BOL: 102 if(s == bol || *(s-1) == '\n') 103 continue; 104 break; 105 case EOL: 106 if(r == 0 || r == '\n') 107 continue; 108 break; 109 case CCLASS: 110 ep = inst->r.cp->end; 111 for(rp = inst->r.cp->spans; rp < ep; rp += 2) 112 if(r >= rp[0] && r <= rp[1]){ 113 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 114 return -1; 115 break; 116 } 117 break; 118 case NCCLASS: 119 ep = inst->r.cp->end; 120 for(rp = inst->r.cp->spans; rp < ep; rp += 2) 121 if(r >= rp[0] && r <= rp[1]) 122 break; 123 if(rp == ep) 124 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 125 return -1; 126 break; 127 case OR: 128 /* evaluate right choice later */ 129 if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) 130 return -1; 131 /* efficiency: advance and re-evaluate */ 132 continue; 133 case END: /* Match! */ 134 match = 1; 135 tlp->se.m[0].e.ep = s; 136 if(mp != 0) 137 _renewmatch(mp, ms, &tlp->se); 138 break; 139 } 140 break; 141 } 142 } 143 checkstart = startchar && nl->inst==0; 144 s += n; 145 }while(r); 146 return match; 147 } 148 149 extern int 150 regexec(Reprog *progp, /* program to run */ 151 char *bol, /* string to run machine on */ 152 Resub *mp, /* subexpression elements */ 153 int ms) /* number of elements at mp */ 154 { 155 char *starts; /* where to start match */ 156 char *eol; /* where to end match */ 157 wchar_t startchar; 158 int rv; 159 160 /* 161 * use user-specified starting/ending location if specified 162 */ 163 starts = bol; 164 eol = 0; 165 if(mp && ms>0){ 166 if(mp->s.sp) 167 starts = mp->s.sp; 168 if(mp->e.ep) 169 eol = mp->e.ep; 170 } 171 startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) 172 ? progp->startinst->r.r : 0; 173 174 /* keep trying till we have enough list space to terminate */ 175 for(;;){ 176 if(_relist[0] == 0){ 177 _relist[0] = malloc(2*_relistsize*sizeof(Relist)); 178 _relist[1] = _relist[0] + _relistsize; 179 _reliste[0] = _relist[0] + _relistsize - 1; 180 _reliste[1] = _relist[1] + _relistsize - 1; 181 if(_relist[0] == 0) 182 regerror("_relist overflow"); 183 } 184 rv = regexec1(progp, bol, mp, ms, starts, eol, startchar); 185 if(rv >= 0) 186 return rv; 187 free(_relist[0]); 188 _relist[0] = 0; 189 _relistsize += LISTINCREMENT; 190 } 191 } 192