1 #include <lib9.h> 2 #include "regexp.h" 3 #include "regcomp.h" 4 5 /* 6 * return 0 if no match 7 * >0 if a match 8 * <0 if we ran out of _relist space 9 */ 10 static int 11 rregexec1(Reprog *progp, /* program to run */ 12 Rune *bol, /* string to run machine on */ 13 Resub *mp, /* subexpression elements */ 14 int ms, /* number of elements at mp */ 15 Reljunk *j) 16 { 17 int flag=0; 18 Reinst *inst; 19 Relist *tlp; 20 Rune *s; 21 int i, checkstart; 22 Rune r, *rp, *ep; 23 Relist* tl; /* This list, next list */ 24 Relist* nl; 25 Relist* tle; /* ends of this and next list */ 26 Relist* nle; 27 int match; 28 29 match = 0; 30 checkstart = j->startchar; 31 if(mp) 32 for(i=0; i<ms; i++) { 33 mp[i].s.rsp = 0; 34 mp[i].e.rep = 0; 35 } 36 j->relist[0][0].inst = 0; 37 j->relist[1][0].inst = 0; 38 39 /* Execute machine once for each character, including terminal NUL */ 40 s = j->rstarts; 41 do{ 42 43 /* fast check for first char */ 44 if(checkstart) { 45 switch(j->starttype) { 46 case RUNE: 47 while(*s != j->startchar) { 48 if(*s == 0 || s == j->reol) 49 return match; 50 s++; 51 } 52 break; 53 case BOL: 54 if(s == bol) 55 break; 56 while(*s != '\n') { 57 if(*s == 0 || s == j->reol) 58 return match; 59 s++; 60 } 61 break; 62 } 63 } 64 65 r = *s; 66 67 /* switch run lists */ 68 tl = j->relist[flag]; 69 tle = j->reliste[flag]; 70 nl = j->relist[flag^=1]; 71 nle = j->reliste[flag]; 72 nl->inst = 0; 73 74 /* Add first instruction to current list */ 75 _rrenewemptythread(tl, progp->startinst, s); 76 77 /* Execute machine until current list is empty */ 78 for(tlp=tl; tlp->inst; tlp++){ 79 for(inst=tlp->inst; ; inst = inst->u2.next){ 80 switch(inst->type){ 81 case RUNE: /* regular character */ 82 if(inst->u1.r == r) 83 if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) 84 return -1; 85 break; 86 case LBRA: 87 tlp->se.m[inst->u1.subid].s.rsp = s; 88 continue; 89 case RBRA: 90 tlp->se.m[inst->u1.subid].e.rep = s; 91 continue; 92 case ANY: 93 if(r != '\n') 94 if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) 95 return -1; 96 break; 97 case ANYNL: 98 if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) 99 return -1; 100 break; 101 case BOL: 102 if(s == bol || *(s-1) == '\n') 103 continue; 104 break; 105 case EOL: 106 if(s == j->reol || r == 0 || r == '\n') 107 continue; 108 break; 109 case CCLASS: 110 ep = inst->u1.cp->end; 111 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 112 if(r >= rp[0] && r <= rp[1]){ 113 if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) 114 return -1; 115 break; 116 } 117 break; 118 case NCCLASS: 119 ep = inst->u1.cp->end; 120 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 121 if(r >= rp[0] && r <= rp[1]) 122 break; 123 if(rp == ep) 124 if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) 125 return -1; 126 break; 127 case OR: 128 /* evaluate right choice later */ 129 if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) 130 return -1; 131 /* efficiency: advance and re-evaluate */ 132 continue; 133 case END: /* Match! */ 134 match = 1; 135 tlp->se.m[0].e.rep = s; 136 if(mp != 0) 137 _renewmatch(mp, ms, &tlp->se); 138 break; 139 } 140 break; 141 } 142 } 143 if(s == j->reol) 144 break; 145 checkstart = j->startchar && nl->inst==0; 146 s++; 147 }while(r); 148 return match; 149 } 150 151 static int 152 rregexec2(Reprog *progp, /* program to run */ 153 Rune *bol, /* string to run machine on */ 154 Resub *mp, /* subexpression elements */ 155 int ms, /* number of elements at mp */ 156 Reljunk *j 157 ) 158 { 159 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; 160 161 /* mark space */ 162 j->relist[0] = relist0; 163 j->relist[1] = relist1; 164 j->reliste[0] = relist0 + nelem(relist0) - 2; 165 j->reliste[1] = relist1 + nelem(relist1) - 2; 166 167 return rregexec1(progp, bol, mp, ms, j); 168 } 169 170 extern int 171 rregexec(Reprog *progp, /* program to run */ 172 Rune *bol, /* string to run machine on */ 173 Resub *mp, /* subexpression elements */ 174 int ms) /* number of elements at mp */ 175 { 176 Reljunk j; 177 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 178 int rv; 179 180 /* 181 * use user-specified starting/ending location if specified 182 */ 183 j.rstarts = bol; 184 j.reol = 0; 185 if(mp && ms>0){ 186 if(mp->s.sp) 187 j.rstarts = mp->s.rsp; 188 if(mp->e.ep) 189 j.reol = mp->e.rep; 190 } 191 j.starttype = 0; 192 j.startchar = 0; 193 if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { 194 j.starttype = RUNE; 195 j.startchar = progp->startinst->u1.r; 196 } 197 if(progp->startinst->type == BOL) 198 j.starttype = BOL; 199 200 /* mark space */ 201 j.relist[0] = relist0; 202 j.relist[1] = relist1; 203 j.reliste[0] = relist0 + nelem(relist0) - 2; 204 j.reliste[1] = relist1 + nelem(relist1) - 2; 205 206 rv = rregexec1(progp, bol, mp, ms, &j); 207 if(rv >= 0) 208 return rv; 209 rv = rregexec2(progp, bol, mp, ms, &j); 210 if(rv >= 0) 211 return rv; 212 return -1; 213 } 214