1 #include <stdlib.h>
2 #include <stdio.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5
6 static Resublist sempty; /* empty set of matches */
7
8 /*
9 * return 0 if no match
10 * >0 if a match
11 * <0 if we ran out of _relist space
12 */
13 static int
regexec1(Reprog * progp,char * bol,Resub * mp,int ms,char * starts,char * eol,wchar_t startchar)14 regexec1(Reprog *progp, /* program to run */
15 char *bol, /* string to run machine on */
16 Resub *mp, /* subexpression elements */
17 int ms, /* number of elements at mp */
18 char *starts,
19 char *eol,
20 wchar_t startchar)
21 {
22 int flag=0;
23 Reinst *inst;
24 Relist *tlp;
25 char *s;
26 int i, checkstart;
27 wchar_t r, *rp, *ep;
28 int n;
29 Relist* tl; /* This list, next list */
30 Relist* nl;
31 Relist* tle; /* ends of this and next list */
32 Relist* nle;
33 int match;
34
35 match = 0;
36 checkstart = startchar;
37 sempty.m[0].s.sp = 0;
38 if(mp!=0)
39 for(i=0; i<ms; i++)
40 mp[i].s.sp = mp[i].e.ep = 0;
41 _relist[0][0].inst = _relist[1][0].inst = 0;
42
43 /* Execute machine once for each character, including terminal NUL */
44 s = starts;
45 do{
46 /* fast check for first char */
47 r = *(unsigned char*)s;
48 if(checkstart && r != startchar){
49 s++;
50 continue;
51 }
52
53 if(r < Runeself)
54 n = 1;
55 else {
56 n = mbtowc(&r, s, MB_CUR_MAX);
57 if (n <= 0)
58 n = 1;
59 }
60
61 /* switch run lists */
62 tl = _relist[flag];
63 tle = _reliste[flag];
64 nl = _relist[flag^=1];
65 nle = _reliste[flag];
66 nl->inst = 0;
67
68 /* Add first instruction to current list */
69 if(match == 0){
70 sempty.m[0].s.sp = s;
71 _renewthread(tl, progp->startinst, &sempty);
72 }
73
74 /* Execute machine until current list is empty */
75 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
76 if(s == eol)
77 break;
78
79 for(inst = tlp->inst; ; inst = inst->l.next){
80 switch(inst->type){
81 case RUNE: /* regular character */
82 if(inst->r.r == r)
83 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
84 return -1;
85 break;
86 case LBRA:
87 tlp->se.m[inst->r.subid].s.sp = s;
88 continue;
89 case RBRA:
90 tlp->se.m[inst->r.subid].e.ep = s;
91 continue;
92 case ANY:
93 if(r != '\n')
94 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
95 return -1;
96 break;
97 case ANYNL:
98 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
99 return -1;
100 break;
101 case BOL:
102 if(s == bol || *(s-1) == '\n')
103 continue;
104 break;
105 case EOL:
106 if(r == 0 || r == '\n')
107 continue;
108 break;
109 case CCLASS:
110 ep = inst->r.cp->end;
111 for(rp = inst->r.cp->spans; rp < ep; rp += 2)
112 if(r >= rp[0] && r <= rp[1]){
113 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
114 return -1;
115 break;
116 }
117 break;
118 case NCCLASS:
119 ep = inst->r.cp->end;
120 for(rp = inst->r.cp->spans; rp < ep; rp += 2)
121 if(r >= rp[0] && r <= rp[1])
122 break;
123 if(rp == ep)
124 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
125 return -1;
126 break;
127 case OR:
128 /* evaluate right choice later */
129 if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
130 return -1;
131 /* efficiency: advance and re-evaluate */
132 continue;
133 case END: /* Match! */
134 match = 1;
135 tlp->se.m[0].e.ep = s;
136 if(mp != 0)
137 _renewmatch(mp, ms, &tlp->se);
138 break;
139 }
140 break;
141 }
142 }
143 checkstart = startchar && nl->inst==0;
144 s += n;
145 }while(r);
146 return match;
147 }
148
149 extern int
regexec(Reprog * progp,char * bol,Resub * mp,int ms)150 regexec(Reprog *progp, /* program to run */
151 char *bol, /* string to run machine on */
152 Resub *mp, /* subexpression elements */
153 int ms) /* number of elements at mp */
154 {
155 char *starts; /* where to start match */
156 char *eol; /* where to end match */
157 wchar_t startchar;
158 int rv;
159
160 /*
161 * use user-specified starting/ending location if specified
162 */
163 starts = bol;
164 eol = 0;
165 if(mp && ms>0){
166 if(mp->s.sp)
167 starts = mp->s.sp;
168 if(mp->e.ep)
169 eol = mp->e.ep;
170 }
171 startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
172 ? progp->startinst->r.r : 0;
173
174 /* keep trying till we have enough list space to terminate */
175 for(;;){
176 if(_relist[0] == 0){
177 _relist[0] = malloc(2*_relistsize*sizeof(Relist));
178 _relist[1] = _relist[0] + _relistsize;
179 _reliste[0] = _relist[0] + _relistsize - 1;
180 _reliste[1] = _relist[1] + _relistsize - 1;
181 if(_relist[0] == 0)
182 regerror("_relist overflow");
183 }
184 rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
185 if(rv >= 0)
186 return rv;
187 free(_relist[0]);
188 _relist[0] = 0;
189 _relistsize += LISTINCREMENT;
190 }
191 }
192