1 #include "grep.h"
2
3 /*
4 * incremental compiler.
5 * add the branch c to the
6 * state s.
7 */
8 void
increment(State * s,int c)9 increment(State *s, int c)
10 {
11 int i;
12 State *t, **tt;
13 Re *re1, *re2;
14
15 nfollow = 0;
16 gen++;
17 matched = 0;
18 for(i=0; i<s->count; i++)
19 fol1(s->re[i], c);
20 qsort(follow, nfollow, sizeof(*follow), fcmp);
21 for(tt=&state0; t = *tt;) {
22 if(t->count > nfollow) {
23 tt = &t->linkleft;
24 goto cont;
25 }
26 if(t->count < nfollow) {
27 tt = &t->linkright;
28 goto cont;
29 }
30 for(i=0; i<nfollow; i++) {
31 re1 = t->re[i];
32 re2 = follow[i];
33 if(re1 > re2) {
34 tt = &t->linkleft;
35 goto cont;
36 }
37 if(re1 < re2) {
38 tt = &t->linkright;
39 goto cont;
40 }
41 }
42 if(!!matched && !t->match) {
43 tt = &t->linkleft;
44 goto cont;
45 }
46 if(!matched && !!t->match) {
47 tt = &t->linkright;
48 goto cont;
49 }
50 s->next[c] = t;
51 return;
52 cont:;
53 }
54
55 t = sal(nfollow);
56 *tt = t;
57 for(i=0; i<nfollow; i++) {
58 re1 = follow[i];
59 t->re[i] = re1;
60 }
61 s->next[c] = t;
62 t->match = matched;
63 }
64
65 int
fcmp(void * va,void * vb)66 fcmp(void *va, void *vb)
67 {
68 Re **aa, **bb;
69 Re *a, *b;
70
71 aa = va;
72 bb = vb;
73 a = *aa;
74 b = *bb;
75 if(a > b)
76 return 1;
77 if(a < b)
78 return -1;
79 return 0;
80 }
81
82 void
fol1(Re * r,int c)83 fol1(Re *r, int c)
84 {
85 Re *r1;
86
87 loop:
88 if(r->gen == gen)
89 return;
90 if(nfollow >= maxfollow)
91 error("nfollow");
92 r->gen = gen;
93 switch(r->type) {
94 default:
95 error("fol1");
96
97 case Tcase:
98 if(c >= 0 && c < 256)
99 if(r1 = r->cases[c])
100 follow[nfollow++] = r1;
101 if(r = r->next)
102 goto loop;
103 break;
104
105 case Talt:
106 case Tor:
107 fol1(r->alt, c);
108 r = r->next;
109 goto loop;
110
111 case Tbegin:
112 if(c == '\n' || c == Cbegin)
113 follow[nfollow++] = r->next;
114 break;
115
116 case Tend:
117 if(c == '\n') {
118 if(r->next == 0) {
119 matched = 1;
120 break;
121 }
122 r = r->next;
123 goto loop;
124 }
125 break;
126
127 case Tclass:
128 if(c >= r->lo && c <= r->hi)
129 follow[nfollow++] = r->next;
130 break;
131 }
132 }
133
134 Rune tab1[] =
135 {
136 0x007f,
137 0x07ff,
138 };
139 Rune tab2[] =
140 {
141 0x003f,
142 0x0fff,
143 };
144
145 Re2
rclass(Rune p0,Rune p1)146 rclass(Rune p0, Rune p1)
147 {
148 char xc0[6], xc1[6];
149 int i, n, m;
150 Re2 x;
151
152 if(p0 > p1)
153 return re2char(0xff, 0xff); // no match
154
155 /*
156 * bust range into same length
157 * character sequences
158 */
159 for(i=0; i<nelem(tab1); i++) {
160 m = tab1[i];
161 if(p0 <= m && p1 > m)
162 return re2or(rclass(p0, m), rclass(m+1, p1));
163 }
164
165 /*
166 * bust range into part of a single page
167 * or into full pages
168 */
169 for(i=0; i<nelem(tab2); i++) {
170 m = tab2[i];
171 if((p0 & ~m) != (p1 & ~m)) {
172 if((p0 & m) != 0)
173 return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));
174 if((p1 & m) != m)
175 return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));
176 }
177 }
178
179 n = runetochar(xc0, &p0);
180 i = runetochar(xc1, &p1);
181 if(i != n)
182 error("length");
183
184 x = re2char(xc0[0], xc1[0]);
185 for(i=1; i<n; i++)
186 x = re2cat(x, re2char(xc0[i], xc1[i]));
187 return x;
188 }
189
190 int
pcmp(void * va,void * vb)191 pcmp(void *va, void *vb)
192 {
193 int n;
194 Rune *a, *b;
195
196 a = va;
197 b = vb;
198
199 n = a[0] - b[0];
200 if(n)
201 return n;
202 return a[1] - b[1];
203 }
204
205 /*
206 * convert character chass into
207 * run-pair ranges of matches.
208 * this is 10646/utf specific and
209 * needs to be changed for some
210 * other input character set.
211 * this is the key to a fast
212 * regular search of characters
213 * by looking at sequential bytes.
214 */
215 Re2
re2class(char * s)216 re2class(char *s)
217 {
218 Rune pairs[200+2], *p, *q, ov;
219 int nc;
220 Re2 x;
221
222 nc = 0;
223 if(*s == '^') {
224 nc = 1;
225 s++;
226 }
227
228 p = pairs;
229 s += chartorune(p, s);
230 for(;;) {
231 if(*p == '\\')
232 s += chartorune(p, s);
233 if(*p == 0)
234 break;
235 p[1] = *p;
236 p += 2;
237 if(p >= pairs + nelem(pairs) - 2)
238 error("class too big");
239 s += chartorune(p, s);
240 if(*p != '-')
241 continue;
242 s += chartorune(p, s);
243 if(*p == '\\')
244 s += chartorune(p, s);
245 if(*p == 0)
246 break;
247 p[-1] = *p;
248 s += chartorune(p, s);
249 }
250 *p = 0;
251 qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);
252
253 q = pairs;
254 for(p=pairs+2; *p; p+=2) {
255 if(p[0] > p[1])
256 continue;
257 if(p[0] > q[1] || p[1] < q[0]) {
258 q[2] = p[0];
259 q[3] = p[1];
260 q += 2;
261 continue;
262 }
263 if(p[0] < q[0])
264 q[0] = p[0];
265 if(p[1] > q[1])
266 q[1] = p[1];
267 }
268 q[2] = 0;
269
270 p = pairs;
271 if(nc) {
272 x = rclass(0, p[0]-1);
273 ov = p[1]+1;
274 for(p+=2; *p; p+=2) {
275 x = re2or(x, rclass(ov, p[0]-1));
276 ov = p[1]+1;
277 }
278 x = re2or(x, rclass(ov, Runemask));
279 } else {
280 x = rclass(p[0], p[1]);
281 for(p+=2; *p; p+=2)
282 x = re2or(x, rclass(p[0], p[1]));
283 }
284 return x;
285 }
286