1 #include <u.h> 2 #include <libc.h> 3 #include "regexp.h" 4 #include "regcomp.h" 5 6 static Resublist sempty; /* empty set of matches */ 7 8 /* 9 * return 0 if no match 10 * >0 if a match 11 * <0 if we ran out of _relist space 12 */ 13 static int 14 rregexec1(Reprog *progp, /* program to run */ 15 Rune *bol, /* string to run machine on */ 16 Resub *mp, /* subexpression elements */ 17 int ms, /* number of elements at mp */ 18 Rune *starts, 19 Rune *eol, 20 Rune startchar) 21 { 22 int flag=0; 23 Reinst *inst; 24 Relist *tlp; 25 Rune *s; 26 int i, checkstart; 27 Rune r, *rp, *ep; 28 Relist* tl; /* This list, next list */ 29 Relist* nl; 30 Relist* tle; /* ends of this and next list */ 31 Relist* nle; 32 int match; 33 34 match = 0; 35 checkstart = startchar; 36 sempty.m[0].rsp = 0; 37 if(mp!=0) 38 for(i=0; i<ms; i++) 39 mp[i].rsp = mp[i].rep = 0; 40 _relist[0][0].inst = _relist[1][0].inst = 0; 41 42 /* Execute machine once for each character, including terminal NUL */ 43 s = starts; 44 do{ 45 r = *s; 46 47 /* fast check for first char */ 48 if(checkstart && r!=startchar){ 49 s++; 50 continue; 51 } 52 53 /* switch run lists */ 54 tl = _relist[flag]; 55 tle = _reliste[flag]; 56 nl = _relist[flag^=1]; 57 nle = _reliste[flag]; 58 nl->inst = 0; 59 60 /* Add first instruction to current list */ 61 sempty.m[0].rsp = s; 62 _renewthread(tl, progp->startinst, &sempty); 63 64 /* Execute machine until current list is empty */ 65 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 66 if(s == eol) 67 break; 68 69 for(inst=tlp->inst; ; inst = inst->next){ 70 switch(inst->type){ 71 case RUNE: /* regular character */ 72 if(inst->r == r) 73 if(_renewthread(nl, inst->next, &tlp->se)==nle) 74 return -1; 75 break; 76 case LBRA: 77 tlp->se.m[inst->subid].rsp = s; 78 continue; 79 case RBRA: 80 tlp->se.m[inst->subid].rep = s; 81 continue; 82 case ANY: 83 if(r != '\n') 84 if(_renewthread(nl, inst->next, &tlp->se)==nle) 85 return -1; 86 break; 87 case ANYNL: 88 if(_renewthread(nl, inst->next, &tlp->se)==nle) 89 return -1; 90 break; 91 case BOL: 92 if(s == bol || *(s-1) == '\n') 93 continue; 94 break; 95 case EOL: 96 if(r == 0 || r == '\n') 97 continue; 98 break; 99 case CCLASS: 100 ep = inst->cp->end; 101 for(rp = inst->cp->spans; rp < ep; rp += 2) 102 if(r >= rp[0] && r <= rp[1]){ 103 if(_renewthread(nl, inst->next, &tlp->se)==nle) 104 return -1; 105 break; 106 } 107 break; 108 case NCCLASS: 109 ep = inst->cp->end; 110 for(rp = inst->cp->spans; rp < ep; rp += 2) 111 if(r >= rp[0] && r <= rp[1]) 112 break; 113 if(rp == ep) 114 if(_renewthread(nl, inst->next, &tlp->se)==nle) 115 return -1; 116 break; 117 case OR: 118 /* evaluate right choice later */ 119 if(_renewthread(tlp, inst->right, &tlp->se) == tle) 120 return -1; 121 /* efficiency: advance and re-evaluate */ 122 continue; 123 case END: /* Match! */ 124 match = 1; 125 tlp->se.m[0].rep = s; 126 if(mp != 0) 127 _renewmatch(mp, ms, &tlp->se); 128 break; 129 } 130 break; 131 } 132 } 133 checkstart = startchar && nl->inst==0; 134 s++; 135 }while(r); 136 return match; 137 } 138 139 extern int 140 rregexec(Reprog *progp, /* program to run */ 141 Rune *bol, /* string to run machine on */ 142 Resub *mp, /* subexpression elements */ 143 int ms) /* number of elements at mp */ 144 { 145 Rune *starts; /* where to start match */ 146 Rune *eol; /* where to end match */ 147 Rune startchar; 148 int rv; 149 150 /* 151 * use user-specified starting/ending location if specified 152 */ 153 starts = bol; 154 eol = 0; 155 if(mp && ms>0){ 156 if(mp->rsp) 157 starts = mp->rsp; 158 if(mp->rep) 159 eol = mp->rep; 160 } 161 startchar = progp->startinst->type == RUNE ? progp->startinst->r : 0; 162 163 /* keep trying till we have enough list space to terminate */ 164 for(;;){ 165 if(_relist[0] == 0){ 166 _relist[0] = malloc(2*_relistsize*sizeof(Relist)); 167 _relist[1] = _relist[0] + _relistsize; 168 _reliste[0] = _relist[0] + _relistsize - 1; 169 _reliste[1] = _relist[1] + _relistsize - 1; 170 if(_relist[0] == 0) 171 regerror("_relist overflow"); 172 } 173 rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar); 174 if(rv >= 0) 175 return rv; 176 free(_relist[0]); 177 _relist[0] = 0; 178 _relistsize += LISTINCREMENT; 179 } 180 return -1; 181 } 182