1*74a4d8c2SCharles.Forsyth /****************************************************************
2*74a4d8c2SCharles.Forsyth Copyright (C) Lucent Technologies 1997
3*74a4d8c2SCharles.Forsyth All Rights Reserved
4*74a4d8c2SCharles.Forsyth
5*74a4d8c2SCharles.Forsyth Permission to use, copy, modify, and distribute this software and
6*74a4d8c2SCharles.Forsyth its documentation for any purpose and without fee is hereby
7*74a4d8c2SCharles.Forsyth granted, provided that the above copyright notice appear in all
8*74a4d8c2SCharles.Forsyth copies and that both that the copyright notice and this
9*74a4d8c2SCharles.Forsyth permission notice and warranty disclaimer appear in supporting
10*74a4d8c2SCharles.Forsyth documentation, and that the name Lucent Technologies or any of
11*74a4d8c2SCharles.Forsyth its entities not be used in advertising or publicity pertaining
12*74a4d8c2SCharles.Forsyth to distribution of the software without specific, written prior
13*74a4d8c2SCharles.Forsyth permission.
14*74a4d8c2SCharles.Forsyth
15*74a4d8c2SCharles.Forsyth LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*74a4d8c2SCharles.Forsyth INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*74a4d8c2SCharles.Forsyth IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*74a4d8c2SCharles.Forsyth SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*74a4d8c2SCharles.Forsyth WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*74a4d8c2SCharles.Forsyth IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*74a4d8c2SCharles.Forsyth ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*74a4d8c2SCharles.Forsyth THIS SOFTWARE.
23*74a4d8c2SCharles.Forsyth ****************************************************************/
24*74a4d8c2SCharles.Forsyth
25*74a4d8c2SCharles.Forsyth /* lasciate ogne speranza, voi ch'entrate. */
26*74a4d8c2SCharles.Forsyth
27*74a4d8c2SCharles.Forsyth #define DEBUG
28*74a4d8c2SCharles.Forsyth
29*74a4d8c2SCharles.Forsyth #include <ctype.h>
30*74a4d8c2SCharles.Forsyth #include <stdio.h>
31*74a4d8c2SCharles.Forsyth #include <string.h>
32*74a4d8c2SCharles.Forsyth #include <stdlib.h>
33*74a4d8c2SCharles.Forsyth #include "awk.h"
34*74a4d8c2SCharles.Forsyth #include "ytab.h"
35*74a4d8c2SCharles.Forsyth
36*74a4d8c2SCharles.Forsyth #define HAT (NCHARS-2) /* matches ^ in regular expr */
37*74a4d8c2SCharles.Forsyth /* NCHARS is 2**n */
38*74a4d8c2SCharles.Forsyth #define MAXLIN 22
39*74a4d8c2SCharles.Forsyth
40*74a4d8c2SCharles.Forsyth #define type(v) (v)->nobj /* badly overloaded here */
41*74a4d8c2SCharles.Forsyth #define info(v) (v)->ntype /* badly overloaded here */
42*74a4d8c2SCharles.Forsyth #define left(v) (v)->narg[0]
43*74a4d8c2SCharles.Forsyth #define right(v) (v)->narg[1]
44*74a4d8c2SCharles.Forsyth #define parent(v) (v)->nnext
45*74a4d8c2SCharles.Forsyth
46*74a4d8c2SCharles.Forsyth #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47*74a4d8c2SCharles.Forsyth #define UNARY case STAR: case PLUS: case QUEST:
48*74a4d8c2SCharles.Forsyth
49*74a4d8c2SCharles.Forsyth /* encoding in tree Nodes:
50*74a4d8c2SCharles.Forsyth leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51*74a4d8c2SCharles.Forsyth left is index, right contains value or pointer to value
52*74a4d8c2SCharles.Forsyth unary (STAR, PLUS, QUEST): left is child, right is null
53*74a4d8c2SCharles.Forsyth binary (CAT, OR): left and right are children
54*74a4d8c2SCharles.Forsyth parent contains pointer to parent
55*74a4d8c2SCharles.Forsyth */
56*74a4d8c2SCharles.Forsyth
57*74a4d8c2SCharles.Forsyth
58*74a4d8c2SCharles.Forsyth int *setvec;
59*74a4d8c2SCharles.Forsyth int *tmpset;
60*74a4d8c2SCharles.Forsyth int maxsetvec = 0;
61*74a4d8c2SCharles.Forsyth
62*74a4d8c2SCharles.Forsyth int rtok; /* next token in current re */
63*74a4d8c2SCharles.Forsyth int rlxval;
64*74a4d8c2SCharles.Forsyth static uschar *rlxstr;
65*74a4d8c2SCharles.Forsyth static uschar *prestr; /* current position in current re */
66*74a4d8c2SCharles.Forsyth static uschar *lastre; /* origin of last re */
67*74a4d8c2SCharles.Forsyth
68*74a4d8c2SCharles.Forsyth static int setcnt;
69*74a4d8c2SCharles.Forsyth static int poscnt;
70*74a4d8c2SCharles.Forsyth
71*74a4d8c2SCharles.Forsyth char *patbeg;
72*74a4d8c2SCharles.Forsyth int patlen;
73*74a4d8c2SCharles.Forsyth
74*74a4d8c2SCharles.Forsyth #define NFA 20 /* cache this many dynamic fa's */
75*74a4d8c2SCharles.Forsyth fa *fatab[NFA];
76*74a4d8c2SCharles.Forsyth int nfatab = 0; /* entries in fatab */
77*74a4d8c2SCharles.Forsyth
makedfa(char * s,int anchor)78*74a4d8c2SCharles.Forsyth fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
79*74a4d8c2SCharles.Forsyth {
80*74a4d8c2SCharles.Forsyth int i, use, nuse;
81*74a4d8c2SCharles.Forsyth fa *pfa;
82*74a4d8c2SCharles.Forsyth static int now = 1;
83*74a4d8c2SCharles.Forsyth
84*74a4d8c2SCharles.Forsyth if (setvec == 0) { /* first time through any RE */
85*74a4d8c2SCharles.Forsyth maxsetvec = MAXLIN;
86*74a4d8c2SCharles.Forsyth setvec = (int *) malloc(maxsetvec * sizeof(int));
87*74a4d8c2SCharles.Forsyth tmpset = (int *) malloc(maxsetvec * sizeof(int));
88*74a4d8c2SCharles.Forsyth if (setvec == 0 || tmpset == 0)
89*74a4d8c2SCharles.Forsyth overflo("out of space initializing makedfa");
90*74a4d8c2SCharles.Forsyth }
91*74a4d8c2SCharles.Forsyth
92*74a4d8c2SCharles.Forsyth if (compile_time) /* a constant for sure */
93*74a4d8c2SCharles.Forsyth return mkdfa(s, anchor);
94*74a4d8c2SCharles.Forsyth for (i = 0; i < nfatab; i++) /* is it there already? */
95*74a4d8c2SCharles.Forsyth if (fatab[i]->anchor == anchor
96*74a4d8c2SCharles.Forsyth && strcmp(fatab[i]->restr, s) == 0) {
97*74a4d8c2SCharles.Forsyth fatab[i]->use = now++;
98*74a4d8c2SCharles.Forsyth return fatab[i];
99*74a4d8c2SCharles.Forsyth }
100*74a4d8c2SCharles.Forsyth pfa = mkdfa(s, anchor);
101*74a4d8c2SCharles.Forsyth if (nfatab < NFA) { /* room for another */
102*74a4d8c2SCharles.Forsyth fatab[nfatab] = pfa;
103*74a4d8c2SCharles.Forsyth fatab[nfatab]->use = now++;
104*74a4d8c2SCharles.Forsyth nfatab++;
105*74a4d8c2SCharles.Forsyth return pfa;
106*74a4d8c2SCharles.Forsyth }
107*74a4d8c2SCharles.Forsyth use = fatab[0]->use; /* replace least-recently used */
108*74a4d8c2SCharles.Forsyth nuse = 0;
109*74a4d8c2SCharles.Forsyth for (i = 1; i < nfatab; i++)
110*74a4d8c2SCharles.Forsyth if (fatab[i]->use < use) {
111*74a4d8c2SCharles.Forsyth use = fatab[i]->use;
112*74a4d8c2SCharles.Forsyth nuse = i;
113*74a4d8c2SCharles.Forsyth }
114*74a4d8c2SCharles.Forsyth freefa(fatab[nuse]);
115*74a4d8c2SCharles.Forsyth fatab[nuse] = pfa;
116*74a4d8c2SCharles.Forsyth pfa->use = now++;
117*74a4d8c2SCharles.Forsyth return pfa;
118*74a4d8c2SCharles.Forsyth }
119*74a4d8c2SCharles.Forsyth
mkdfa(char * s,int anchor)120*74a4d8c2SCharles.Forsyth fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
121*74a4d8c2SCharles.Forsyth /* anchor = 1 for anchored matches, else 0 */
122*74a4d8c2SCharles.Forsyth {
123*74a4d8c2SCharles.Forsyth Node *p, *p1;
124*74a4d8c2SCharles.Forsyth fa *f;
125*74a4d8c2SCharles.Forsyth
126*74a4d8c2SCharles.Forsyth p = reparse(s);
127*74a4d8c2SCharles.Forsyth p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128*74a4d8c2SCharles.Forsyth /* put ALL STAR in front of reg. exp. */
129*74a4d8c2SCharles.Forsyth p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130*74a4d8c2SCharles.Forsyth /* put FINAL after reg. exp. */
131*74a4d8c2SCharles.Forsyth
132*74a4d8c2SCharles.Forsyth poscnt = 0;
133*74a4d8c2SCharles.Forsyth penter(p1); /* enter parent pointers and leaf indices */
134*74a4d8c2SCharles.Forsyth if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135*74a4d8c2SCharles.Forsyth overflo("out of space for fa");
136*74a4d8c2SCharles.Forsyth f->accept = poscnt-1; /* penter has computed number of positions in re */
137*74a4d8c2SCharles.Forsyth cfoll(f, p1); /* set up follow sets */
138*74a4d8c2SCharles.Forsyth freetr(p1);
139*74a4d8c2SCharles.Forsyth if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140*74a4d8c2SCharles.Forsyth overflo("out of space in makedfa");
141*74a4d8c2SCharles.Forsyth if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142*74a4d8c2SCharles.Forsyth overflo("out of space in makedfa");
143*74a4d8c2SCharles.Forsyth *f->posns[1] = 0;
144*74a4d8c2SCharles.Forsyth f->initstat = makeinit(f, anchor);
145*74a4d8c2SCharles.Forsyth f->anchor = anchor;
146*74a4d8c2SCharles.Forsyth f->restr = (uschar *) tostring(s);
147*74a4d8c2SCharles.Forsyth return f;
148*74a4d8c2SCharles.Forsyth }
149*74a4d8c2SCharles.Forsyth
makeinit(fa * f,int anchor)150*74a4d8c2SCharles.Forsyth int makeinit(fa *f, int anchor)
151*74a4d8c2SCharles.Forsyth {
152*74a4d8c2SCharles.Forsyth int i, k;
153*74a4d8c2SCharles.Forsyth
154*74a4d8c2SCharles.Forsyth f->curstat = 2;
155*74a4d8c2SCharles.Forsyth f->out[2] = 0;
156*74a4d8c2SCharles.Forsyth f->reset = 0;
157*74a4d8c2SCharles.Forsyth k = *(f->re[0].lfollow);
158*74a4d8c2SCharles.Forsyth xfree(f->posns[2]);
159*74a4d8c2SCharles.Forsyth if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160*74a4d8c2SCharles.Forsyth overflo("out of space in makeinit");
161*74a4d8c2SCharles.Forsyth for (i=0; i <= k; i++) {
162*74a4d8c2SCharles.Forsyth (f->posns[2])[i] = (f->re[0].lfollow)[i];
163*74a4d8c2SCharles.Forsyth }
164*74a4d8c2SCharles.Forsyth if ((f->posns[2])[1] == f->accept)
165*74a4d8c2SCharles.Forsyth f->out[2] = 1;
166*74a4d8c2SCharles.Forsyth for (i=0; i < NCHARS; i++)
167*74a4d8c2SCharles.Forsyth f->gototab[2][i] = 0;
168*74a4d8c2SCharles.Forsyth f->curstat = cgoto(f, 2, HAT);
169*74a4d8c2SCharles.Forsyth if (anchor) {
170*74a4d8c2SCharles.Forsyth *f->posns[2] = k-1; /* leave out position 0 */
171*74a4d8c2SCharles.Forsyth for (i=0; i < k; i++) {
172*74a4d8c2SCharles.Forsyth (f->posns[0])[i] = (f->posns[2])[i];
173*74a4d8c2SCharles.Forsyth }
174*74a4d8c2SCharles.Forsyth
175*74a4d8c2SCharles.Forsyth f->out[0] = f->out[2];
176*74a4d8c2SCharles.Forsyth if (f->curstat != 2)
177*74a4d8c2SCharles.Forsyth --(*f->posns[f->curstat]);
178*74a4d8c2SCharles.Forsyth }
179*74a4d8c2SCharles.Forsyth return f->curstat;
180*74a4d8c2SCharles.Forsyth }
181*74a4d8c2SCharles.Forsyth
penter(Node * p)182*74a4d8c2SCharles.Forsyth void penter(Node *p) /* set up parent pointers and leaf indices */
183*74a4d8c2SCharles.Forsyth {
184*74a4d8c2SCharles.Forsyth switch (type(p)) {
185*74a4d8c2SCharles.Forsyth LEAF
186*74a4d8c2SCharles.Forsyth info(p) = poscnt;
187*74a4d8c2SCharles.Forsyth poscnt++;
188*74a4d8c2SCharles.Forsyth break;
189*74a4d8c2SCharles.Forsyth UNARY
190*74a4d8c2SCharles.Forsyth penter(left(p));
191*74a4d8c2SCharles.Forsyth parent(left(p)) = p;
192*74a4d8c2SCharles.Forsyth break;
193*74a4d8c2SCharles.Forsyth case CAT:
194*74a4d8c2SCharles.Forsyth case OR:
195*74a4d8c2SCharles.Forsyth penter(left(p));
196*74a4d8c2SCharles.Forsyth penter(right(p));
197*74a4d8c2SCharles.Forsyth parent(left(p)) = p;
198*74a4d8c2SCharles.Forsyth parent(right(p)) = p;
199*74a4d8c2SCharles.Forsyth break;
200*74a4d8c2SCharles.Forsyth default: /* can't happen */
201*74a4d8c2SCharles.Forsyth FATAL("can't happen: unknown type %d in penter", type(p));
202*74a4d8c2SCharles.Forsyth break;
203*74a4d8c2SCharles.Forsyth }
204*74a4d8c2SCharles.Forsyth }
205*74a4d8c2SCharles.Forsyth
freetr(Node * p)206*74a4d8c2SCharles.Forsyth void freetr(Node *p) /* free parse tree */
207*74a4d8c2SCharles.Forsyth {
208*74a4d8c2SCharles.Forsyth switch (type(p)) {
209*74a4d8c2SCharles.Forsyth LEAF
210*74a4d8c2SCharles.Forsyth xfree(p);
211*74a4d8c2SCharles.Forsyth break;
212*74a4d8c2SCharles.Forsyth UNARY
213*74a4d8c2SCharles.Forsyth freetr(left(p));
214*74a4d8c2SCharles.Forsyth xfree(p);
215*74a4d8c2SCharles.Forsyth break;
216*74a4d8c2SCharles.Forsyth case CAT:
217*74a4d8c2SCharles.Forsyth case OR:
218*74a4d8c2SCharles.Forsyth freetr(left(p));
219*74a4d8c2SCharles.Forsyth freetr(right(p));
220*74a4d8c2SCharles.Forsyth xfree(p);
221*74a4d8c2SCharles.Forsyth break;
222*74a4d8c2SCharles.Forsyth default: /* can't happen */
223*74a4d8c2SCharles.Forsyth FATAL("can't happen: unknown type %d in freetr", type(p));
224*74a4d8c2SCharles.Forsyth break;
225*74a4d8c2SCharles.Forsyth }
226*74a4d8c2SCharles.Forsyth }
227*74a4d8c2SCharles.Forsyth
228*74a4d8c2SCharles.Forsyth /* in the parsing of regular expressions, metacharacters like . have */
229*74a4d8c2SCharles.Forsyth /* to be seen literally; \056 is not a metacharacter. */
230*74a4d8c2SCharles.Forsyth
hexstr(char ** pp)231*74a4d8c2SCharles.Forsyth int hexstr(char **pp) /* find and eval hex string at pp, return new p */
232*74a4d8c2SCharles.Forsyth { /* only pick up one 8-bit byte (2 chars) */
233*74a4d8c2SCharles.Forsyth uschar *p;
234*74a4d8c2SCharles.Forsyth int n = 0;
235*74a4d8c2SCharles.Forsyth int i;
236*74a4d8c2SCharles.Forsyth
237*74a4d8c2SCharles.Forsyth for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238*74a4d8c2SCharles.Forsyth if (isdigit(*p))
239*74a4d8c2SCharles.Forsyth n = 16 * n + *p - '0';
240*74a4d8c2SCharles.Forsyth else if (*p >= 'a' && *p <= 'f')
241*74a4d8c2SCharles.Forsyth n = 16 * n + *p - 'a' + 10;
242*74a4d8c2SCharles.Forsyth else if (*p >= 'A' && *p <= 'F')
243*74a4d8c2SCharles.Forsyth n = 16 * n + *p - 'A' + 10;
244*74a4d8c2SCharles.Forsyth }
245*74a4d8c2SCharles.Forsyth *pp = (char *) p;
246*74a4d8c2SCharles.Forsyth return n;
247*74a4d8c2SCharles.Forsyth }
248*74a4d8c2SCharles.Forsyth
249*74a4d8c2SCharles.Forsyth #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
250*74a4d8c2SCharles.Forsyth
quoted(char ** pp)251*74a4d8c2SCharles.Forsyth int quoted(char **pp) /* pick up next thing after a \\ */
252*74a4d8c2SCharles.Forsyth /* and increment *pp */
253*74a4d8c2SCharles.Forsyth {
254*74a4d8c2SCharles.Forsyth char *p = *pp;
255*74a4d8c2SCharles.Forsyth int c;
256*74a4d8c2SCharles.Forsyth
257*74a4d8c2SCharles.Forsyth if ((c = *p++) == 't')
258*74a4d8c2SCharles.Forsyth c = '\t';
259*74a4d8c2SCharles.Forsyth else if (c == 'n')
260*74a4d8c2SCharles.Forsyth c = '\n';
261*74a4d8c2SCharles.Forsyth else if (c == 'f')
262*74a4d8c2SCharles.Forsyth c = '\f';
263*74a4d8c2SCharles.Forsyth else if (c == 'r')
264*74a4d8c2SCharles.Forsyth c = '\r';
265*74a4d8c2SCharles.Forsyth else if (c == 'b')
266*74a4d8c2SCharles.Forsyth c = '\b';
267*74a4d8c2SCharles.Forsyth else if (c == '\\')
268*74a4d8c2SCharles.Forsyth c = '\\';
269*74a4d8c2SCharles.Forsyth else if (c == 'x') { /* hexadecimal goo follows */
270*74a4d8c2SCharles.Forsyth c = hexstr(&p); /* this adds a null if number is invalid */
271*74a4d8c2SCharles.Forsyth } else if (isoctdigit(c)) { /* \d \dd \ddd */
272*74a4d8c2SCharles.Forsyth int n = c - '0';
273*74a4d8c2SCharles.Forsyth if (isoctdigit(*p)) {
274*74a4d8c2SCharles.Forsyth n = 8 * n + *p++ - '0';
275*74a4d8c2SCharles.Forsyth if (isoctdigit(*p))
276*74a4d8c2SCharles.Forsyth n = 8 * n + *p++ - '0';
277*74a4d8c2SCharles.Forsyth }
278*74a4d8c2SCharles.Forsyth c = n;
279*74a4d8c2SCharles.Forsyth } /* else */
280*74a4d8c2SCharles.Forsyth /* c = c; */
281*74a4d8c2SCharles.Forsyth *pp = p;
282*74a4d8c2SCharles.Forsyth return c;
283*74a4d8c2SCharles.Forsyth }
284*74a4d8c2SCharles.Forsyth
cclenter(char * argp)285*74a4d8c2SCharles.Forsyth char *cclenter(char *argp) /* add a character class */
286*74a4d8c2SCharles.Forsyth {
287*74a4d8c2SCharles.Forsyth int i, c, c2;
288*74a4d8c2SCharles.Forsyth uschar *p = (uschar *) argp;
289*74a4d8c2SCharles.Forsyth uschar *op, *bp;
290*74a4d8c2SCharles.Forsyth static uschar *buf = 0;
291*74a4d8c2SCharles.Forsyth static int bufsz = 100;
292*74a4d8c2SCharles.Forsyth
293*74a4d8c2SCharles.Forsyth op = p;
294*74a4d8c2SCharles.Forsyth if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295*74a4d8c2SCharles.Forsyth FATAL("out of space for character class [%.10s...] 1", p);
296*74a4d8c2SCharles.Forsyth bp = buf;
297*74a4d8c2SCharles.Forsyth for (i = 0; (c = *p++) != 0; ) {
298*74a4d8c2SCharles.Forsyth if (c == '\\') {
299*74a4d8c2SCharles.Forsyth c = quoted((char **) &p);
300*74a4d8c2SCharles.Forsyth } else if (c == '-' && i > 0 && bp[-1] != 0) {
301*74a4d8c2SCharles.Forsyth if (*p != 0) {
302*74a4d8c2SCharles.Forsyth c = bp[-1];
303*74a4d8c2SCharles.Forsyth c2 = *p++;
304*74a4d8c2SCharles.Forsyth if (c2 == '\\')
305*74a4d8c2SCharles.Forsyth c2 = quoted((char **) &p);
306*74a4d8c2SCharles.Forsyth if (c > c2) { /* empty; ignore */
307*74a4d8c2SCharles.Forsyth bp--;
308*74a4d8c2SCharles.Forsyth i--;
309*74a4d8c2SCharles.Forsyth continue;
310*74a4d8c2SCharles.Forsyth }
311*74a4d8c2SCharles.Forsyth while (c < c2) {
312*74a4d8c2SCharles.Forsyth if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313*74a4d8c2SCharles.Forsyth FATAL("out of space for character class [%.10s...] 2", p);
314*74a4d8c2SCharles.Forsyth *bp++ = ++c;
315*74a4d8c2SCharles.Forsyth i++;
316*74a4d8c2SCharles.Forsyth }
317*74a4d8c2SCharles.Forsyth continue;
318*74a4d8c2SCharles.Forsyth }
319*74a4d8c2SCharles.Forsyth }
320*74a4d8c2SCharles.Forsyth if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321*74a4d8c2SCharles.Forsyth FATAL("out of space for character class [%.10s...] 3", p);
322*74a4d8c2SCharles.Forsyth *bp++ = c;
323*74a4d8c2SCharles.Forsyth i++;
324*74a4d8c2SCharles.Forsyth }
325*74a4d8c2SCharles.Forsyth *bp = 0;
326*74a4d8c2SCharles.Forsyth dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327*74a4d8c2SCharles.Forsyth xfree(op);
328*74a4d8c2SCharles.Forsyth return (char *) tostring((char *) buf);
329*74a4d8c2SCharles.Forsyth }
330*74a4d8c2SCharles.Forsyth
overflo(char * s)331*74a4d8c2SCharles.Forsyth void overflo(char *s)
332*74a4d8c2SCharles.Forsyth {
333*74a4d8c2SCharles.Forsyth FATAL("regular expression too big: %.30s...", s);
334*74a4d8c2SCharles.Forsyth }
335*74a4d8c2SCharles.Forsyth
cfoll(fa * f,Node * v)336*74a4d8c2SCharles.Forsyth void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
337*74a4d8c2SCharles.Forsyth {
338*74a4d8c2SCharles.Forsyth int i;
339*74a4d8c2SCharles.Forsyth int *p;
340*74a4d8c2SCharles.Forsyth
341*74a4d8c2SCharles.Forsyth switch (type(v)) {
342*74a4d8c2SCharles.Forsyth LEAF
343*74a4d8c2SCharles.Forsyth f->re[info(v)].ltype = type(v);
344*74a4d8c2SCharles.Forsyth f->re[info(v)].lval.np = right(v);
345*74a4d8c2SCharles.Forsyth while (f->accept >= maxsetvec) { /* guessing here! */
346*74a4d8c2SCharles.Forsyth maxsetvec *= 4;
347*74a4d8c2SCharles.Forsyth setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348*74a4d8c2SCharles.Forsyth tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349*74a4d8c2SCharles.Forsyth if (setvec == 0 || tmpset == 0)
350*74a4d8c2SCharles.Forsyth overflo("out of space in cfoll()");
351*74a4d8c2SCharles.Forsyth }
352*74a4d8c2SCharles.Forsyth for (i = 0; i <= f->accept; i++)
353*74a4d8c2SCharles.Forsyth setvec[i] = 0;
354*74a4d8c2SCharles.Forsyth setcnt = 0;
355*74a4d8c2SCharles.Forsyth follow(v); /* computes setvec and setcnt */
356*74a4d8c2SCharles.Forsyth if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357*74a4d8c2SCharles.Forsyth overflo("out of space building follow set");
358*74a4d8c2SCharles.Forsyth f->re[info(v)].lfollow = p;
359*74a4d8c2SCharles.Forsyth *p = setcnt;
360*74a4d8c2SCharles.Forsyth for (i = f->accept; i >= 0; i--)
361*74a4d8c2SCharles.Forsyth if (setvec[i] == 1)
362*74a4d8c2SCharles.Forsyth *++p = i;
363*74a4d8c2SCharles.Forsyth break;
364*74a4d8c2SCharles.Forsyth UNARY
365*74a4d8c2SCharles.Forsyth cfoll(f,left(v));
366*74a4d8c2SCharles.Forsyth break;
367*74a4d8c2SCharles.Forsyth case CAT:
368*74a4d8c2SCharles.Forsyth case OR:
369*74a4d8c2SCharles.Forsyth cfoll(f,left(v));
370*74a4d8c2SCharles.Forsyth cfoll(f,right(v));
371*74a4d8c2SCharles.Forsyth break;
372*74a4d8c2SCharles.Forsyth default: /* can't happen */
373*74a4d8c2SCharles.Forsyth FATAL("can't happen: unknown type %d in cfoll", type(v));
374*74a4d8c2SCharles.Forsyth }
375*74a4d8c2SCharles.Forsyth }
376*74a4d8c2SCharles.Forsyth
first(Node * p)377*74a4d8c2SCharles.Forsyth int first(Node *p) /* collects initially active leaves of p into setvec */
378*74a4d8c2SCharles.Forsyth /* returns 1 if p matches empty string */
379*74a4d8c2SCharles.Forsyth {
380*74a4d8c2SCharles.Forsyth int b, lp;
381*74a4d8c2SCharles.Forsyth
382*74a4d8c2SCharles.Forsyth switch (type(p)) {
383*74a4d8c2SCharles.Forsyth LEAF
384*74a4d8c2SCharles.Forsyth lp = info(p); /* look for high-water mark of subscripts */
385*74a4d8c2SCharles.Forsyth while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
386*74a4d8c2SCharles.Forsyth maxsetvec *= 4;
387*74a4d8c2SCharles.Forsyth setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388*74a4d8c2SCharles.Forsyth tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389*74a4d8c2SCharles.Forsyth if (setvec == 0 || tmpset == 0)
390*74a4d8c2SCharles.Forsyth overflo("out of space in first()");
391*74a4d8c2SCharles.Forsyth }
392*74a4d8c2SCharles.Forsyth if (setvec[lp] != 1) {
393*74a4d8c2SCharles.Forsyth setvec[lp] = 1;
394*74a4d8c2SCharles.Forsyth setcnt++;
395*74a4d8c2SCharles.Forsyth }
396*74a4d8c2SCharles.Forsyth if (type(p) == CCL && (*(char *) right(p)) == '\0')
397*74a4d8c2SCharles.Forsyth return(0); /* empty CCL */
398*74a4d8c2SCharles.Forsyth else return(1);
399*74a4d8c2SCharles.Forsyth case PLUS:
400*74a4d8c2SCharles.Forsyth if (first(left(p)) == 0) return(0);
401*74a4d8c2SCharles.Forsyth return(1);
402*74a4d8c2SCharles.Forsyth case STAR:
403*74a4d8c2SCharles.Forsyth case QUEST:
404*74a4d8c2SCharles.Forsyth first(left(p));
405*74a4d8c2SCharles.Forsyth return(0);
406*74a4d8c2SCharles.Forsyth case CAT:
407*74a4d8c2SCharles.Forsyth if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408*74a4d8c2SCharles.Forsyth return(1);
409*74a4d8c2SCharles.Forsyth case OR:
410*74a4d8c2SCharles.Forsyth b = first(right(p));
411*74a4d8c2SCharles.Forsyth if (first(left(p)) == 0 || b == 0) return(0);
412*74a4d8c2SCharles.Forsyth return(1);
413*74a4d8c2SCharles.Forsyth }
414*74a4d8c2SCharles.Forsyth FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
415*74a4d8c2SCharles.Forsyth return(-1);
416*74a4d8c2SCharles.Forsyth }
417*74a4d8c2SCharles.Forsyth
follow(Node * v)418*74a4d8c2SCharles.Forsyth void follow(Node *v) /* collects leaves that can follow v into setvec */
419*74a4d8c2SCharles.Forsyth {
420*74a4d8c2SCharles.Forsyth Node *p;
421*74a4d8c2SCharles.Forsyth
422*74a4d8c2SCharles.Forsyth if (type(v) == FINAL)
423*74a4d8c2SCharles.Forsyth return;
424*74a4d8c2SCharles.Forsyth p = parent(v);
425*74a4d8c2SCharles.Forsyth switch (type(p)) {
426*74a4d8c2SCharles.Forsyth case STAR:
427*74a4d8c2SCharles.Forsyth case PLUS:
428*74a4d8c2SCharles.Forsyth first(v);
429*74a4d8c2SCharles.Forsyth follow(p);
430*74a4d8c2SCharles.Forsyth return;
431*74a4d8c2SCharles.Forsyth
432*74a4d8c2SCharles.Forsyth case OR:
433*74a4d8c2SCharles.Forsyth case QUEST:
434*74a4d8c2SCharles.Forsyth follow(p);
435*74a4d8c2SCharles.Forsyth return;
436*74a4d8c2SCharles.Forsyth
437*74a4d8c2SCharles.Forsyth case CAT:
438*74a4d8c2SCharles.Forsyth if (v == left(p)) { /* v is left child of p */
439*74a4d8c2SCharles.Forsyth if (first(right(p)) == 0) {
440*74a4d8c2SCharles.Forsyth follow(p);
441*74a4d8c2SCharles.Forsyth return;
442*74a4d8c2SCharles.Forsyth }
443*74a4d8c2SCharles.Forsyth } else /* v is right child */
444*74a4d8c2SCharles.Forsyth follow(p);
445*74a4d8c2SCharles.Forsyth return;
446*74a4d8c2SCharles.Forsyth }
447*74a4d8c2SCharles.Forsyth }
448*74a4d8c2SCharles.Forsyth
member(int c,char * sarg)449*74a4d8c2SCharles.Forsyth int member(int c, char *sarg) /* is c in s? */
450*74a4d8c2SCharles.Forsyth {
451*74a4d8c2SCharles.Forsyth uschar *s = (uschar *) sarg;
452*74a4d8c2SCharles.Forsyth
453*74a4d8c2SCharles.Forsyth while (*s)
454*74a4d8c2SCharles.Forsyth if (c == *s++)
455*74a4d8c2SCharles.Forsyth return(1);
456*74a4d8c2SCharles.Forsyth return(0);
457*74a4d8c2SCharles.Forsyth }
458*74a4d8c2SCharles.Forsyth
match(fa * f,char * p0)459*74a4d8c2SCharles.Forsyth int match(fa *f, char *p0) /* shortest match ? */
460*74a4d8c2SCharles.Forsyth {
461*74a4d8c2SCharles.Forsyth int s, ns;
462*74a4d8c2SCharles.Forsyth uschar *p = (uschar *) p0;
463*74a4d8c2SCharles.Forsyth
464*74a4d8c2SCharles.Forsyth s = f->reset ? makeinit(f,0) : f->initstat;
465*74a4d8c2SCharles.Forsyth if (f->out[s])
466*74a4d8c2SCharles.Forsyth return(1);
467*74a4d8c2SCharles.Forsyth do {
468*74a4d8c2SCharles.Forsyth if ((ns = f->gototab[s][*p]) != 0)
469*74a4d8c2SCharles.Forsyth s = ns;
470*74a4d8c2SCharles.Forsyth else
471*74a4d8c2SCharles.Forsyth s = cgoto(f, s, *p);
472*74a4d8c2SCharles.Forsyth if (f->out[s])
473*74a4d8c2SCharles.Forsyth return(1);
474*74a4d8c2SCharles.Forsyth } while (*p++ != 0);
475*74a4d8c2SCharles.Forsyth return(0);
476*74a4d8c2SCharles.Forsyth }
477*74a4d8c2SCharles.Forsyth
pmatch(fa * f,char * p0)478*74a4d8c2SCharles.Forsyth int pmatch(fa *f, char *p0) /* longest match, for sub */
479*74a4d8c2SCharles.Forsyth {
480*74a4d8c2SCharles.Forsyth int s, ns;
481*74a4d8c2SCharles.Forsyth uschar *p = (uschar *) p0;
482*74a4d8c2SCharles.Forsyth uschar *q;
483*74a4d8c2SCharles.Forsyth int i, k;
484*74a4d8c2SCharles.Forsyth
485*74a4d8c2SCharles.Forsyth s = f->reset ? makeinit(f,1) : f->initstat;
486*74a4d8c2SCharles.Forsyth patbeg = (char *) p;
487*74a4d8c2SCharles.Forsyth patlen = -1;
488*74a4d8c2SCharles.Forsyth do {
489*74a4d8c2SCharles.Forsyth q = p;
490*74a4d8c2SCharles.Forsyth do {
491*74a4d8c2SCharles.Forsyth if (f->out[s]) /* final state */
492*74a4d8c2SCharles.Forsyth patlen = q-p;
493*74a4d8c2SCharles.Forsyth if ((ns = f->gototab[s][*q]) != 0)
494*74a4d8c2SCharles.Forsyth s = ns;
495*74a4d8c2SCharles.Forsyth else
496*74a4d8c2SCharles.Forsyth s = cgoto(f, s, *q);
497*74a4d8c2SCharles.Forsyth if (s == 1) { /* no transition */
498*74a4d8c2SCharles.Forsyth if (patlen >= 0) {
499*74a4d8c2SCharles.Forsyth patbeg = (char *) p;
500*74a4d8c2SCharles.Forsyth return(1);
501*74a4d8c2SCharles.Forsyth }
502*74a4d8c2SCharles.Forsyth else
503*74a4d8c2SCharles.Forsyth goto nextin; /* no match */
504*74a4d8c2SCharles.Forsyth }
505*74a4d8c2SCharles.Forsyth } while (*q++ != 0);
506*74a4d8c2SCharles.Forsyth if (f->out[s])
507*74a4d8c2SCharles.Forsyth patlen = q-p-1; /* don't count $ */
508*74a4d8c2SCharles.Forsyth if (patlen >= 0) {
509*74a4d8c2SCharles.Forsyth patbeg = (char *) p;
510*74a4d8c2SCharles.Forsyth return(1);
511*74a4d8c2SCharles.Forsyth }
512*74a4d8c2SCharles.Forsyth nextin:
513*74a4d8c2SCharles.Forsyth s = 2;
514*74a4d8c2SCharles.Forsyth if (f->reset) {
515*74a4d8c2SCharles.Forsyth for (i = 2; i <= f->curstat; i++)
516*74a4d8c2SCharles.Forsyth xfree(f->posns[i]);
517*74a4d8c2SCharles.Forsyth k = *f->posns[0];
518*74a4d8c2SCharles.Forsyth if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
519*74a4d8c2SCharles.Forsyth overflo("out of space in pmatch");
520*74a4d8c2SCharles.Forsyth for (i = 0; i <= k; i++)
521*74a4d8c2SCharles.Forsyth (f->posns[2])[i] = (f->posns[0])[i];
522*74a4d8c2SCharles.Forsyth f->initstat = f->curstat = 2;
523*74a4d8c2SCharles.Forsyth f->out[2] = f->out[0];
524*74a4d8c2SCharles.Forsyth for (i = 0; i < NCHARS; i++)
525*74a4d8c2SCharles.Forsyth f->gototab[2][i] = 0;
526*74a4d8c2SCharles.Forsyth }
527*74a4d8c2SCharles.Forsyth } while (*p++ != 0);
528*74a4d8c2SCharles.Forsyth return (0);
529*74a4d8c2SCharles.Forsyth }
530*74a4d8c2SCharles.Forsyth
nematch(fa * f,char * p0)531*74a4d8c2SCharles.Forsyth int nematch(fa *f, char *p0) /* non-empty match, for sub */
532*74a4d8c2SCharles.Forsyth {
533*74a4d8c2SCharles.Forsyth int s, ns;
534*74a4d8c2SCharles.Forsyth uschar *p = (uschar *) p0;
535*74a4d8c2SCharles.Forsyth uschar *q;
536*74a4d8c2SCharles.Forsyth int i, k;
537*74a4d8c2SCharles.Forsyth
538*74a4d8c2SCharles.Forsyth s = f->reset ? makeinit(f,1) : f->initstat;
539*74a4d8c2SCharles.Forsyth patlen = -1;
540*74a4d8c2SCharles.Forsyth while (*p) {
541*74a4d8c2SCharles.Forsyth q = p;
542*74a4d8c2SCharles.Forsyth do {
543*74a4d8c2SCharles.Forsyth if (f->out[s]) /* final state */
544*74a4d8c2SCharles.Forsyth patlen = q-p;
545*74a4d8c2SCharles.Forsyth if ((ns = f->gototab[s][*q]) != 0)
546*74a4d8c2SCharles.Forsyth s = ns;
547*74a4d8c2SCharles.Forsyth else
548*74a4d8c2SCharles.Forsyth s = cgoto(f, s, *q);
549*74a4d8c2SCharles.Forsyth if (s == 1) { /* no transition */
550*74a4d8c2SCharles.Forsyth if (patlen > 0) {
551*74a4d8c2SCharles.Forsyth patbeg = (char *) p;
552*74a4d8c2SCharles.Forsyth return(1);
553*74a4d8c2SCharles.Forsyth } else
554*74a4d8c2SCharles.Forsyth goto nnextin; /* no nonempty match */
555*74a4d8c2SCharles.Forsyth }
556*74a4d8c2SCharles.Forsyth } while (*q++ != 0);
557*74a4d8c2SCharles.Forsyth if (f->out[s])
558*74a4d8c2SCharles.Forsyth patlen = q-p-1; /* don't count $ */
559*74a4d8c2SCharles.Forsyth if (patlen > 0 ) {
560*74a4d8c2SCharles.Forsyth patbeg = (char *) p;
561*74a4d8c2SCharles.Forsyth return(1);
562*74a4d8c2SCharles.Forsyth }
563*74a4d8c2SCharles.Forsyth nnextin:
564*74a4d8c2SCharles.Forsyth s = 2;
565*74a4d8c2SCharles.Forsyth if (f->reset) {
566*74a4d8c2SCharles.Forsyth for (i = 2; i <= f->curstat; i++)
567*74a4d8c2SCharles.Forsyth xfree(f->posns[i]);
568*74a4d8c2SCharles.Forsyth k = *f->posns[0];
569*74a4d8c2SCharles.Forsyth if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
570*74a4d8c2SCharles.Forsyth overflo("out of state space");
571*74a4d8c2SCharles.Forsyth for (i = 0; i <= k; i++)
572*74a4d8c2SCharles.Forsyth (f->posns[2])[i] = (f->posns[0])[i];
573*74a4d8c2SCharles.Forsyth f->initstat = f->curstat = 2;
574*74a4d8c2SCharles.Forsyth f->out[2] = f->out[0];
575*74a4d8c2SCharles.Forsyth for (i = 0; i < NCHARS; i++)
576*74a4d8c2SCharles.Forsyth f->gototab[2][i] = 0;
577*74a4d8c2SCharles.Forsyth }
578*74a4d8c2SCharles.Forsyth p++;
579*74a4d8c2SCharles.Forsyth }
580*74a4d8c2SCharles.Forsyth return (0);
581*74a4d8c2SCharles.Forsyth }
582*74a4d8c2SCharles.Forsyth
reparse(char * p)583*74a4d8c2SCharles.Forsyth Node *reparse(char *p) /* parses regular expression pointed to by p */
584*74a4d8c2SCharles.Forsyth { /* uses relex() to scan regular expression */
585*74a4d8c2SCharles.Forsyth Node *np;
586*74a4d8c2SCharles.Forsyth
587*74a4d8c2SCharles.Forsyth dprintf( ("reparse <%s>\n", p) );
588*74a4d8c2SCharles.Forsyth lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
589*74a4d8c2SCharles.Forsyth rtok = relex();
590*74a4d8c2SCharles.Forsyth if (rtok == '\0')
591*74a4d8c2SCharles.Forsyth FATAL("empty regular expression");
592*74a4d8c2SCharles.Forsyth np = regexp();
593*74a4d8c2SCharles.Forsyth if (rtok != '\0')
594*74a4d8c2SCharles.Forsyth FATAL("syntax error in regular expression %s at %s", lastre, prestr);
595*74a4d8c2SCharles.Forsyth return(np);
596*74a4d8c2SCharles.Forsyth }
597*74a4d8c2SCharles.Forsyth
regexp(void)598*74a4d8c2SCharles.Forsyth Node *regexp(void) /* top-level parse of reg expr */
599*74a4d8c2SCharles.Forsyth {
600*74a4d8c2SCharles.Forsyth return (alt(concat(primary())));
601*74a4d8c2SCharles.Forsyth }
602*74a4d8c2SCharles.Forsyth
primary(void)603*74a4d8c2SCharles.Forsyth Node *primary(void)
604*74a4d8c2SCharles.Forsyth {
605*74a4d8c2SCharles.Forsyth Node *np;
606*74a4d8c2SCharles.Forsyth
607*74a4d8c2SCharles.Forsyth switch (rtok) {
608*74a4d8c2SCharles.Forsyth case CHAR:
609*74a4d8c2SCharles.Forsyth np = op2(CHAR, NIL, itonp(rlxval));
610*74a4d8c2SCharles.Forsyth rtok = relex();
611*74a4d8c2SCharles.Forsyth return (unary(np));
612*74a4d8c2SCharles.Forsyth case ALL:
613*74a4d8c2SCharles.Forsyth rtok = relex();
614*74a4d8c2SCharles.Forsyth return (unary(op2(ALL, NIL, NIL)));
615*74a4d8c2SCharles.Forsyth case DOT:
616*74a4d8c2SCharles.Forsyth rtok = relex();
617*74a4d8c2SCharles.Forsyth return (unary(op2(DOT, NIL, NIL)));
618*74a4d8c2SCharles.Forsyth case CCL:
619*74a4d8c2SCharles.Forsyth np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
620*74a4d8c2SCharles.Forsyth rtok = relex();
621*74a4d8c2SCharles.Forsyth return (unary(np));
622*74a4d8c2SCharles.Forsyth case NCCL:
623*74a4d8c2SCharles.Forsyth np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
624*74a4d8c2SCharles.Forsyth rtok = relex();
625*74a4d8c2SCharles.Forsyth return (unary(np));
626*74a4d8c2SCharles.Forsyth case '^':
627*74a4d8c2SCharles.Forsyth rtok = relex();
628*74a4d8c2SCharles.Forsyth return (unary(op2(CHAR, NIL, itonp(HAT))));
629*74a4d8c2SCharles.Forsyth case '$':
630*74a4d8c2SCharles.Forsyth rtok = relex();
631*74a4d8c2SCharles.Forsyth return (unary(op2(CHAR, NIL, NIL)));
632*74a4d8c2SCharles.Forsyth case '(':
633*74a4d8c2SCharles.Forsyth rtok = relex();
634*74a4d8c2SCharles.Forsyth if (rtok == ')') { /* special pleading for () */
635*74a4d8c2SCharles.Forsyth rtok = relex();
636*74a4d8c2SCharles.Forsyth return unary(op2(CCL, NIL, (Node *) tostring("")));
637*74a4d8c2SCharles.Forsyth }
638*74a4d8c2SCharles.Forsyth np = regexp();
639*74a4d8c2SCharles.Forsyth if (rtok == ')') {
640*74a4d8c2SCharles.Forsyth rtok = relex();
641*74a4d8c2SCharles.Forsyth return (unary(np));
642*74a4d8c2SCharles.Forsyth }
643*74a4d8c2SCharles.Forsyth else
644*74a4d8c2SCharles.Forsyth FATAL("syntax error in regular expression %s at %s", lastre, prestr);
645*74a4d8c2SCharles.Forsyth default:
646*74a4d8c2SCharles.Forsyth FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
647*74a4d8c2SCharles.Forsyth }
648*74a4d8c2SCharles.Forsyth return 0; /*NOTREACHED*/
649*74a4d8c2SCharles.Forsyth }
650*74a4d8c2SCharles.Forsyth
concat(Node * np)651*74a4d8c2SCharles.Forsyth Node *concat(Node *np)
652*74a4d8c2SCharles.Forsyth {
653*74a4d8c2SCharles.Forsyth switch (rtok) {
654*74a4d8c2SCharles.Forsyth case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
655*74a4d8c2SCharles.Forsyth return (concat(op2(CAT, np, primary())));
656*74a4d8c2SCharles.Forsyth }
657*74a4d8c2SCharles.Forsyth return (np);
658*74a4d8c2SCharles.Forsyth }
659*74a4d8c2SCharles.Forsyth
alt(Node * np)660*74a4d8c2SCharles.Forsyth Node *alt(Node *np)
661*74a4d8c2SCharles.Forsyth {
662*74a4d8c2SCharles.Forsyth if (rtok == OR) {
663*74a4d8c2SCharles.Forsyth rtok = relex();
664*74a4d8c2SCharles.Forsyth return (alt(op2(OR, np, concat(primary()))));
665*74a4d8c2SCharles.Forsyth }
666*74a4d8c2SCharles.Forsyth return (np);
667*74a4d8c2SCharles.Forsyth }
668*74a4d8c2SCharles.Forsyth
unary(Node * np)669*74a4d8c2SCharles.Forsyth Node *unary(Node *np)
670*74a4d8c2SCharles.Forsyth {
671*74a4d8c2SCharles.Forsyth switch (rtok) {
672*74a4d8c2SCharles.Forsyth case STAR:
673*74a4d8c2SCharles.Forsyth rtok = relex();
674*74a4d8c2SCharles.Forsyth return (unary(op2(STAR, np, NIL)));
675*74a4d8c2SCharles.Forsyth case PLUS:
676*74a4d8c2SCharles.Forsyth rtok = relex();
677*74a4d8c2SCharles.Forsyth return (unary(op2(PLUS, np, NIL)));
678*74a4d8c2SCharles.Forsyth case QUEST:
679*74a4d8c2SCharles.Forsyth rtok = relex();
680*74a4d8c2SCharles.Forsyth return (unary(op2(QUEST, np, NIL)));
681*74a4d8c2SCharles.Forsyth default:
682*74a4d8c2SCharles.Forsyth return (np);
683*74a4d8c2SCharles.Forsyth }
684*74a4d8c2SCharles.Forsyth }
685*74a4d8c2SCharles.Forsyth
relex(void)686*74a4d8c2SCharles.Forsyth int relex(void) /* lexical analyzer for reparse */
687*74a4d8c2SCharles.Forsyth {
688*74a4d8c2SCharles.Forsyth int c, n;
689*74a4d8c2SCharles.Forsyth int cflag;
690*74a4d8c2SCharles.Forsyth static uschar *buf = 0;
691*74a4d8c2SCharles.Forsyth static int bufsz = 100;
692*74a4d8c2SCharles.Forsyth uschar *bp;
693*74a4d8c2SCharles.Forsyth
694*74a4d8c2SCharles.Forsyth switch (c = *prestr++) {
695*74a4d8c2SCharles.Forsyth case '|': return OR;
696*74a4d8c2SCharles.Forsyth case '*': return STAR;
697*74a4d8c2SCharles.Forsyth case '+': return PLUS;
698*74a4d8c2SCharles.Forsyth case '?': return QUEST;
699*74a4d8c2SCharles.Forsyth case '.': return DOT;
700*74a4d8c2SCharles.Forsyth case '\0': prestr--; return '\0';
701*74a4d8c2SCharles.Forsyth case '^':
702*74a4d8c2SCharles.Forsyth case '$':
703*74a4d8c2SCharles.Forsyth case '(':
704*74a4d8c2SCharles.Forsyth case ')':
705*74a4d8c2SCharles.Forsyth return c;
706*74a4d8c2SCharles.Forsyth case '\\':
707*74a4d8c2SCharles.Forsyth rlxval = quoted((char **) &prestr);
708*74a4d8c2SCharles.Forsyth return CHAR;
709*74a4d8c2SCharles.Forsyth default:
710*74a4d8c2SCharles.Forsyth rlxval = c;
711*74a4d8c2SCharles.Forsyth return CHAR;
712*74a4d8c2SCharles.Forsyth case '[':
713*74a4d8c2SCharles.Forsyth if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
714*74a4d8c2SCharles.Forsyth FATAL("out of space in reg expr %.10s..", lastre);
715*74a4d8c2SCharles.Forsyth bp = buf;
716*74a4d8c2SCharles.Forsyth if (*prestr == '^') {
717*74a4d8c2SCharles.Forsyth cflag = 1;
718*74a4d8c2SCharles.Forsyth prestr++;
719*74a4d8c2SCharles.Forsyth }
720*74a4d8c2SCharles.Forsyth else
721*74a4d8c2SCharles.Forsyth cflag = 0;
722*74a4d8c2SCharles.Forsyth n = 2 * strlen(prestr)+1;
723*74a4d8c2SCharles.Forsyth if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
724*74a4d8c2SCharles.Forsyth FATAL("out of space for reg expr %.10s...", lastre);
725*74a4d8c2SCharles.Forsyth for (; ; ) {
726*74a4d8c2SCharles.Forsyth if ((c = *prestr++) == '\\') {
727*74a4d8c2SCharles.Forsyth *bp++ = '\\';
728*74a4d8c2SCharles.Forsyth if ((c = *prestr++) == '\0')
729*74a4d8c2SCharles.Forsyth FATAL("nonterminated character class %.20s...", lastre);
730*74a4d8c2SCharles.Forsyth *bp++ = c;
731*74a4d8c2SCharles.Forsyth /* } else if (c == '\n') { */
732*74a4d8c2SCharles.Forsyth /* FATAL("newline in character class %.20s...", lastre); */
733*74a4d8c2SCharles.Forsyth } else if (c == '\0') {
734*74a4d8c2SCharles.Forsyth FATAL("nonterminated character class %.20s", lastre);
735*74a4d8c2SCharles.Forsyth } else if (bp == buf) { /* 1st char is special */
736*74a4d8c2SCharles.Forsyth *bp++ = c;
737*74a4d8c2SCharles.Forsyth } else if (c == ']') {
738*74a4d8c2SCharles.Forsyth *bp++ = 0;
739*74a4d8c2SCharles.Forsyth rlxstr = (uschar *) tostring((char *) buf);
740*74a4d8c2SCharles.Forsyth if (cflag == 0)
741*74a4d8c2SCharles.Forsyth return CCL;
742*74a4d8c2SCharles.Forsyth else
743*74a4d8c2SCharles.Forsyth return NCCL;
744*74a4d8c2SCharles.Forsyth } else
745*74a4d8c2SCharles.Forsyth *bp++ = c;
746*74a4d8c2SCharles.Forsyth }
747*74a4d8c2SCharles.Forsyth }
748*74a4d8c2SCharles.Forsyth }
749*74a4d8c2SCharles.Forsyth
cgoto(fa * f,int s,int c)750*74a4d8c2SCharles.Forsyth int cgoto(fa *f, int s, int c)
751*74a4d8c2SCharles.Forsyth {
752*74a4d8c2SCharles.Forsyth int i, j, k;
753*74a4d8c2SCharles.Forsyth int *p, *q;
754*74a4d8c2SCharles.Forsyth
755*74a4d8c2SCharles.Forsyth if (c < 0 || c > 255)
756*74a4d8c2SCharles.Forsyth FATAL("can't happen: neg char %d in cgoto", c);
757*74a4d8c2SCharles.Forsyth while (f->accept >= maxsetvec) { /* guessing here! */
758*74a4d8c2SCharles.Forsyth maxsetvec *= 4;
759*74a4d8c2SCharles.Forsyth setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
760*74a4d8c2SCharles.Forsyth tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
761*74a4d8c2SCharles.Forsyth if (setvec == 0 || tmpset == 0)
762*74a4d8c2SCharles.Forsyth overflo("out of space in cgoto()");
763*74a4d8c2SCharles.Forsyth }
764*74a4d8c2SCharles.Forsyth for (i = 0; i <= f->accept; i++)
765*74a4d8c2SCharles.Forsyth setvec[i] = 0;
766*74a4d8c2SCharles.Forsyth setcnt = 0;
767*74a4d8c2SCharles.Forsyth /* compute positions of gototab[s,c] into setvec */
768*74a4d8c2SCharles.Forsyth p = f->posns[s];
769*74a4d8c2SCharles.Forsyth for (i = 1; i <= *p; i++) {
770*74a4d8c2SCharles.Forsyth if ((k = f->re[p[i]].ltype) != FINAL) {
771*74a4d8c2SCharles.Forsyth if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
772*74a4d8c2SCharles.Forsyth || (k == DOT && c != 0 && c != HAT)
773*74a4d8c2SCharles.Forsyth || (k == ALL && c != 0)
774*74a4d8c2SCharles.Forsyth || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
775*74a4d8c2SCharles.Forsyth || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
776*74a4d8c2SCharles.Forsyth q = f->re[p[i]].lfollow;
777*74a4d8c2SCharles.Forsyth for (j = 1; j <= *q; j++) {
778*74a4d8c2SCharles.Forsyth if (q[j] >= maxsetvec) {
779*74a4d8c2SCharles.Forsyth maxsetvec *= 4;
780*74a4d8c2SCharles.Forsyth setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
781*74a4d8c2SCharles.Forsyth tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
782*74a4d8c2SCharles.Forsyth if (setvec == 0 || tmpset == 0)
783*74a4d8c2SCharles.Forsyth overflo("cgoto overflow");
784*74a4d8c2SCharles.Forsyth }
785*74a4d8c2SCharles.Forsyth if (setvec[q[j]] == 0) {
786*74a4d8c2SCharles.Forsyth setcnt++;
787*74a4d8c2SCharles.Forsyth setvec[q[j]] = 1;
788*74a4d8c2SCharles.Forsyth }
789*74a4d8c2SCharles.Forsyth }
790*74a4d8c2SCharles.Forsyth }
791*74a4d8c2SCharles.Forsyth }
792*74a4d8c2SCharles.Forsyth }
793*74a4d8c2SCharles.Forsyth /* determine if setvec is a previous state */
794*74a4d8c2SCharles.Forsyth tmpset[0] = setcnt;
795*74a4d8c2SCharles.Forsyth j = 1;
796*74a4d8c2SCharles.Forsyth for (i = f->accept; i >= 0; i--)
797*74a4d8c2SCharles.Forsyth if (setvec[i]) {
798*74a4d8c2SCharles.Forsyth tmpset[j++] = i;
799*74a4d8c2SCharles.Forsyth }
800*74a4d8c2SCharles.Forsyth /* tmpset == previous state? */
801*74a4d8c2SCharles.Forsyth for (i = 1; i <= f->curstat; i++) {
802*74a4d8c2SCharles.Forsyth p = f->posns[i];
803*74a4d8c2SCharles.Forsyth if ((k = tmpset[0]) != p[0])
804*74a4d8c2SCharles.Forsyth goto different;
805*74a4d8c2SCharles.Forsyth for (j = 1; j <= k; j++)
806*74a4d8c2SCharles.Forsyth if (tmpset[j] != p[j])
807*74a4d8c2SCharles.Forsyth goto different;
808*74a4d8c2SCharles.Forsyth /* setvec is state i */
809*74a4d8c2SCharles.Forsyth f->gototab[s][c] = i;
810*74a4d8c2SCharles.Forsyth return i;
811*74a4d8c2SCharles.Forsyth different:;
812*74a4d8c2SCharles.Forsyth }
813*74a4d8c2SCharles.Forsyth
814*74a4d8c2SCharles.Forsyth /* add tmpset to current set of states */
815*74a4d8c2SCharles.Forsyth if (f->curstat >= NSTATES-1) {
816*74a4d8c2SCharles.Forsyth f->curstat = 2;
817*74a4d8c2SCharles.Forsyth f->reset = 1;
818*74a4d8c2SCharles.Forsyth for (i = 2; i < NSTATES; i++)
819*74a4d8c2SCharles.Forsyth xfree(f->posns[i]);
820*74a4d8c2SCharles.Forsyth } else
821*74a4d8c2SCharles.Forsyth ++(f->curstat);
822*74a4d8c2SCharles.Forsyth for (i = 0; i < NCHARS; i++)
823*74a4d8c2SCharles.Forsyth f->gototab[f->curstat][i] = 0;
824*74a4d8c2SCharles.Forsyth xfree(f->posns[f->curstat]);
825*74a4d8c2SCharles.Forsyth if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
826*74a4d8c2SCharles.Forsyth overflo("out of space in cgoto");
827*74a4d8c2SCharles.Forsyth
828*74a4d8c2SCharles.Forsyth f->posns[f->curstat] = p;
829*74a4d8c2SCharles.Forsyth f->gototab[s][c] = f->curstat;
830*74a4d8c2SCharles.Forsyth for (i = 0; i <= setcnt; i++)
831*74a4d8c2SCharles.Forsyth p[i] = tmpset[i];
832*74a4d8c2SCharles.Forsyth if (setvec[f->accept])
833*74a4d8c2SCharles.Forsyth f->out[f->curstat] = 1;
834*74a4d8c2SCharles.Forsyth else
835*74a4d8c2SCharles.Forsyth f->out[f->curstat] = 0;
836*74a4d8c2SCharles.Forsyth return f->curstat;
837*74a4d8c2SCharles.Forsyth }
838*74a4d8c2SCharles.Forsyth
839*74a4d8c2SCharles.Forsyth
freefa(fa * f)840*74a4d8c2SCharles.Forsyth void freefa(fa *f) /* free a finite automaton */
841*74a4d8c2SCharles.Forsyth {
842*74a4d8c2SCharles.Forsyth int i;
843*74a4d8c2SCharles.Forsyth
844*74a4d8c2SCharles.Forsyth if (f == NULL)
845*74a4d8c2SCharles.Forsyth return;
846*74a4d8c2SCharles.Forsyth for (i = 0; i <= f->curstat; i++)
847*74a4d8c2SCharles.Forsyth xfree(f->posns[i]);
848*74a4d8c2SCharles.Forsyth for (i = 0; i <= f->accept; i++) {
849*74a4d8c2SCharles.Forsyth xfree(f->re[i].lfollow);
850*74a4d8c2SCharles.Forsyth if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
851*74a4d8c2SCharles.Forsyth xfree((f->re[i].lval.np));
852*74a4d8c2SCharles.Forsyth }
853*74a4d8c2SCharles.Forsyth xfree(f->restr);
854*74a4d8c2SCharles.Forsyth xfree(f);
855*74a4d8c2SCharles.Forsyth }
856