1 #include <stdlib.h> 2 #include <stdio.h> 3 #include "regexp.h" 4 #include "regcomp.h" 5 6 static Resublist sempty; /* empty set of matches */ 7 8 /* 9 * return 0 if no match 10 * >0 if a match 11 * <0 if we ran out of _relist space 12 */ 13 static int 14 rregexec1(Reprog *progp, /* program to run */ 15 wchar_t *bol, /* string to run machine on */ 16 Resub *mp, /* subexpression elements */ 17 int ms, /* number of elements at mp */ 18 wchar_t *starts, 19 wchar_t *eol, 20 wchar_t startchar) 21 { 22 int flag=0; 23 Reinst *inst; 24 Relist *tlp; 25 wchar_t *s; 26 int i, checkstart; 27 wchar_t r, *rp, *ep; 28 int n; 29 Relist* tl; /* This list, next list */ 30 Relist* nl; 31 Relist* tle; /* ends of this and next list */ 32 Relist* nle; 33 int match; 34 35 match = 0; 36 checkstart = startchar; 37 sempty.m[0].s.rsp = 0; 38 if(mp!=0) 39 for(i=0; i<ms; i++) 40 mp[i].s.rsp = mp[i].e.rep = 0; 41 _relist[0][0].inst = _relist[1][0].inst = 0; 42 43 /* Execute machine once for each character, including terminal NUL */ 44 s = starts; 45 do{ 46 r = *s; 47 48 /* fast check for first char */ 49 if(checkstart && r!=startchar){ 50 s++; 51 continue; 52 } 53 54 /* switch run lists */ 55 tl = _relist[flag]; 56 tle = _reliste[flag]; 57 nl = _relist[flag^=1]; 58 nle = _reliste[flag]; 59 nl->inst = 0; 60 61 /* Add first instruction to current list */ 62 sempty.m[0].s.rsp = s; 63 _renewthread(tl, progp->startinst, &sempty); 64 65 /* Execute machine until current list is empty */ 66 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 67 if(s == eol) 68 break; 69 70 for(inst=tlp->inst; ; inst = inst->l.next){ 71 switch(inst->type){ 72 case RUNE: /* regular character */ 73 if(inst->r.r == r) 74 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 75 return -1; 76 break; 77 case LBRA: 78 tlp->se.m[inst->r.subid].s.rsp = s; 79 continue; 80 case RBRA: 81 tlp->se.m[inst->r.subid].e.rep = s; 82 continue; 83 case ANY: 84 if(r != '\n') 85 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 86 return -1; 87 break; 88 case ANYNL: 89 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 90 return -1; 91 break; 92 case BOL: 93 if(s == bol || *(s-1) == '\n') 94 continue; 95 break; 96 case EOL: 97 if(r == 0 || r == '\n') 98 continue; 99 break; 100 case CCLASS: 101 ep = inst->r.cp->end; 102 for(rp = inst->r.cp->spans; rp < ep; rp += 2) 103 if(r >= rp[0] && r <= rp[1]){ 104 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 105 return -1; 106 break; 107 } 108 break; 109 case NCCLASS: 110 ep = inst->r.cp->end; 111 for(rp = inst->r.cp->spans; rp < ep; rp += 2) 112 if(r >= rp[0] && r <= rp[1]) 113 break; 114 if(rp == ep) 115 if(_renewthread(nl, inst->l.next, &tlp->se)==nle) 116 return -1; 117 break; 118 case OR: 119 /* evaluate right choice later */ 120 if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) 121 return -1; 122 /* efficiency: advance and re-evaluate */ 123 continue; 124 case END: /* Match! */ 125 match = 1; 126 tlp->se.m[0].e.rep = s; 127 if(mp != 0) 128 _renewmatch(mp, ms, &tlp->se); 129 break; 130 } 131 break; 132 } 133 } 134 checkstart = startchar && nl->inst==0; 135 s++; 136 }while(r); 137 return match; 138 } 139 140 extern int 141 rregexec(Reprog *progp, /* program to run */ 142 wchar_t *bol, /* string to run machine on */ 143 Resub *mp, /* subexpression elements */ 144 int ms) /* number of elements at mp */ 145 { 146 wchar_t *starts; /* where to start match */ 147 wchar_t *eol; /* where to end match */ 148 wchar_t startchar; 149 int rv; 150 151 /* 152 * use user-specified starting/ending location if specified 153 */ 154 starts = bol; 155 eol = 0; 156 if(mp && ms>0){ 157 if(mp->s.rsp) 158 starts = mp->s.rsp; 159 if(mp->e.rep) 160 eol = mp->e.rep; 161 } 162 startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0; 163 164 /* keep trying till we have enough list space to terminate */ 165 for(;;){ 166 if(_relist[0] == 0){ 167 _relist[0] = malloc(2*_relistsize*sizeof(Relist)); 168 _relist[1] = _relist[0] + _relistsize; 169 _reliste[0] = _relist[0] + _relistsize - 1; 170 _reliste[1] = _relist[1] + _relistsize - 1; 171 if(_relist[0] == 0) 172 regerror("_relist overflow"); 173 } 174 rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar); 175 if(rv >= 0) 176 return rv; 177 free(_relist[0]); 178 _relist[0] = 0; 179 _relistsize += LISTINCREMENT; 180 } 181 } 182