1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5
6 /*
7 * return 0 if no match
8 * >0 if a match
9 * <0 if we ran out of _relist space
10 */
11 static int
rregexec1(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)12 rregexec1(Reprog *progp, /* program to run */
13 Rune *bol, /* string to run machine on */
14 Resub *mp, /* subexpression elements */
15 int ms, /* number of elements at mp */
16 Reljunk *j)
17 {
18 int flag=0;
19 Reinst *inst;
20 Relist *tlp;
21 Rune *s;
22 int i, checkstart;
23 Rune r, *rp, *ep;
24 Relist* tl; /* This list, next list */
25 Relist* nl;
26 Relist* tle; /* ends of this and next list */
27 Relist* nle;
28 int match;
29 Rune *p;
30
31 match = 0;
32 checkstart = j->startchar;
33 if(mp)
34 for(i=0; i<ms; i++) {
35 mp[i].rsp = 0;
36 mp[i].rep = 0;
37 }
38 j->relist[0][0].inst = 0;
39 j->relist[1][0].inst = 0;
40
41 /* Execute machine once for each character, including terminal NUL */
42 s = j->rstarts;
43 do{
44 /* fast check for first char */
45 if(checkstart) {
46 switch(j->starttype) {
47 case RUNE:
48 p = runestrchr(s, j->startchar);
49 if(p == 0 || s == j->reol)
50 return match;
51 s = p;
52 break;
53 case BOL:
54 if(s == bol)
55 break;
56 p = runestrchr(s, '\n');
57 if(p == 0 || s == j->reol)
58 return match;
59 s = p+1;
60 break;
61 }
62 }
63
64 r = *s;
65
66 /* switch run lists */
67 tl = j->relist[flag];
68 tle = j->reliste[flag];
69 nl = j->relist[flag^=1];
70 nle = j->reliste[flag];
71 nl->inst = 0;
72
73 /* Add first instruction to current list */
74 _rrenewemptythread(tl, progp->startinst, ms, s);
75
76 /* Execute machine until current list is empty */
77 for(tlp=tl; tlp->inst; tlp++){
78 for(inst=tlp->inst; ; inst = inst->next){
79 switch(inst->type){
80 case RUNE: /* regular character */
81 if(inst->r == r)
82 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
83 return -1;
84 break;
85 case LBRA:
86 tlp->se.m[inst->subid].rsp = s;
87 continue;
88 case RBRA:
89 tlp->se.m[inst->subid].rep = s;
90 continue;
91 case ANY:
92 if(r != '\n')
93 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
94 return -1;
95 break;
96 case ANYNL:
97 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
98 return -1;
99 break;
100 case BOL:
101 if(s == bol || *(s-1) == '\n')
102 continue;
103 break;
104 case EOL:
105 if(s == j->reol || r == 0 || r == '\n')
106 continue;
107 break;
108 case CCLASS:
109 ep = inst->cp->end;
110 for(rp = inst->cp->spans; rp < ep; rp += 2)
111 if(r >= rp[0] && r <= rp[1]){
112 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
113 return -1;
114 break;
115 }
116 break;
117 case NCCLASS:
118 ep = inst->cp->end;
119 for(rp = inst->cp->spans; rp < ep; rp += 2)
120 if(r >= rp[0] && r <= rp[1])
121 break;
122 if(rp == ep)
123 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
124 return -1;
125 break;
126 case OR:
127 /* evaluate right choice later */
128 if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
129 return -1;
130 /* efficiency: advance and re-evaluate */
131 continue;
132 case END: /* Match! */
133 match = 1;
134 tlp->se.m[0].rep = s;
135 if(mp != 0)
136 _renewmatch(mp, ms, &tlp->se);
137 break;
138 }
139 break;
140 }
141 }
142 if(s == j->reol)
143 break;
144 checkstart = j->startchar && nl->inst==0;
145 s++;
146 }while(r);
147 return match;
148 }
149
150 static int
rregexec2(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)151 rregexec2(Reprog *progp, /* program to run */
152 Rune *bol, /* string to run machine on */
153 Resub *mp, /* subexpression elements */
154 int ms, /* number of elements at mp */
155 Reljunk *j
156 )
157 {
158 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
159
160 /* mark space */
161 j->relist[0] = relist0;
162 j->relist[1] = relist1;
163 j->reliste[0] = relist0 + nelem(relist0) - 2;
164 j->reliste[1] = relist1 + nelem(relist1) - 2;
165
166 return rregexec1(progp, bol, mp, ms, j);
167 }
168
169 extern int
rregexec(Reprog * progp,Rune * bol,Resub * mp,int ms)170 rregexec(Reprog *progp, /* program to run */
171 Rune *bol, /* string to run machine on */
172 Resub *mp, /* subexpression elements */
173 int ms) /* number of elements at mp */
174 {
175 Reljunk j;
176 Relist relist0[LISTSIZE], relist1[LISTSIZE];
177 int rv;
178
179 /*
180 * use user-specified starting/ending location if specified
181 */
182 j.rstarts = bol;
183 j.reol = 0;
184 if(mp && ms>0){
185 if(mp->sp)
186 j.rstarts = mp->rsp;
187 if(mp->ep)
188 j.reol = mp->rep;
189 }
190 j.starttype = 0;
191 j.startchar = 0;
192 if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
193 j.starttype = RUNE;
194 j.startchar = progp->startinst->r;
195 }
196 if(progp->startinst->type == BOL)
197 j.starttype = BOL;
198
199 /* mark space */
200 j.relist[0] = relist0;
201 j.relist[1] = relist1;
202 j.reliste[0] = relist0 + nelem(relist0) - 2;
203 j.reliste[1] = relist1 + nelem(relist1) - 2;
204
205 rv = rregexec1(progp, bol, mp, ms, &j);
206 if(rv >= 0)
207 return rv;
208 rv = rregexec2(progp, bol, mp, ms, &j);
209 if(rv >= 0)
210 return rv;
211 return -1;
212 }
213