xref: /inferno-os/utils/awk/b.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1*74a4d8c2SCharles.Forsyth /****************************************************************
2*74a4d8c2SCharles.Forsyth Copyright (C) Lucent Technologies 1997
3*74a4d8c2SCharles.Forsyth All Rights Reserved
4*74a4d8c2SCharles.Forsyth 
5*74a4d8c2SCharles.Forsyth Permission to use, copy, modify, and distribute this software and
6*74a4d8c2SCharles.Forsyth its documentation for any purpose and without fee is hereby
7*74a4d8c2SCharles.Forsyth granted, provided that the above copyright notice appear in all
8*74a4d8c2SCharles.Forsyth copies and that both that the copyright notice and this
9*74a4d8c2SCharles.Forsyth permission notice and warranty disclaimer appear in supporting
10*74a4d8c2SCharles.Forsyth documentation, and that the name Lucent Technologies or any of
11*74a4d8c2SCharles.Forsyth its entities not be used in advertising or publicity pertaining
12*74a4d8c2SCharles.Forsyth to distribution of the software without specific, written prior
13*74a4d8c2SCharles.Forsyth permission.
14*74a4d8c2SCharles.Forsyth 
15*74a4d8c2SCharles.Forsyth LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*74a4d8c2SCharles.Forsyth INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*74a4d8c2SCharles.Forsyth IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*74a4d8c2SCharles.Forsyth SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*74a4d8c2SCharles.Forsyth WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*74a4d8c2SCharles.Forsyth IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*74a4d8c2SCharles.Forsyth ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*74a4d8c2SCharles.Forsyth THIS SOFTWARE.
23*74a4d8c2SCharles.Forsyth ****************************************************************/
24*74a4d8c2SCharles.Forsyth 
25*74a4d8c2SCharles.Forsyth /* lasciate ogne speranza, voi ch'entrate. */
26*74a4d8c2SCharles.Forsyth 
27*74a4d8c2SCharles.Forsyth #define	DEBUG
28*74a4d8c2SCharles.Forsyth 
29*74a4d8c2SCharles.Forsyth #include <ctype.h>
30*74a4d8c2SCharles.Forsyth #include <stdio.h>
31*74a4d8c2SCharles.Forsyth #include <string.h>
32*74a4d8c2SCharles.Forsyth #include <stdlib.h>
33*74a4d8c2SCharles.Forsyth #include "awk.h"
34*74a4d8c2SCharles.Forsyth #include "ytab.h"
35*74a4d8c2SCharles.Forsyth 
36*74a4d8c2SCharles.Forsyth #define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
37*74a4d8c2SCharles.Forsyth 				/* NCHARS is 2**n */
38*74a4d8c2SCharles.Forsyth #define MAXLIN 22
39*74a4d8c2SCharles.Forsyth 
40*74a4d8c2SCharles.Forsyth #define type(v)		(v)->nobj	/* badly overloaded here */
41*74a4d8c2SCharles.Forsyth #define info(v)		(v)->ntype	/* badly overloaded here */
42*74a4d8c2SCharles.Forsyth #define left(v)		(v)->narg[0]
43*74a4d8c2SCharles.Forsyth #define right(v)	(v)->narg[1]
44*74a4d8c2SCharles.Forsyth #define parent(v)	(v)->nnext
45*74a4d8c2SCharles.Forsyth 
46*74a4d8c2SCharles.Forsyth #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47*74a4d8c2SCharles.Forsyth #define UNARY	case STAR: case PLUS: case QUEST:
48*74a4d8c2SCharles.Forsyth 
49*74a4d8c2SCharles.Forsyth /* encoding in tree Nodes:
50*74a4d8c2SCharles.Forsyth 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51*74a4d8c2SCharles.Forsyth 		left is index, right contains value or pointer to value
52*74a4d8c2SCharles.Forsyth 	unary (STAR, PLUS, QUEST): left is child, right is null
53*74a4d8c2SCharles.Forsyth 	binary (CAT, OR): left and right are children
54*74a4d8c2SCharles.Forsyth 	parent contains pointer to parent
55*74a4d8c2SCharles.Forsyth */
56*74a4d8c2SCharles.Forsyth 
57*74a4d8c2SCharles.Forsyth 
58*74a4d8c2SCharles.Forsyth int	*setvec;
59*74a4d8c2SCharles.Forsyth int	*tmpset;
60*74a4d8c2SCharles.Forsyth int	maxsetvec = 0;
61*74a4d8c2SCharles.Forsyth 
62*74a4d8c2SCharles.Forsyth int	rtok;		/* next token in current re */
63*74a4d8c2SCharles.Forsyth int	rlxval;
64*74a4d8c2SCharles.Forsyth static uschar	*rlxstr;
65*74a4d8c2SCharles.Forsyth static uschar	*prestr;	/* current position in current re */
66*74a4d8c2SCharles.Forsyth static uschar	*lastre;	/* origin of last re */
67*74a4d8c2SCharles.Forsyth 
68*74a4d8c2SCharles.Forsyth static	int setcnt;
69*74a4d8c2SCharles.Forsyth static	int poscnt;
70*74a4d8c2SCharles.Forsyth 
71*74a4d8c2SCharles.Forsyth char	*patbeg;
72*74a4d8c2SCharles.Forsyth int	patlen;
73*74a4d8c2SCharles.Forsyth 
74*74a4d8c2SCharles.Forsyth #define	NFA	20	/* cache this many dynamic fa's */
75*74a4d8c2SCharles.Forsyth fa	*fatab[NFA];
76*74a4d8c2SCharles.Forsyth int	nfatab	= 0;	/* entries in fatab */
77*74a4d8c2SCharles.Forsyth 
makedfa(char * s,int anchor)78*74a4d8c2SCharles.Forsyth fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
79*74a4d8c2SCharles.Forsyth {
80*74a4d8c2SCharles.Forsyth 	int i, use, nuse;
81*74a4d8c2SCharles.Forsyth 	fa *pfa;
82*74a4d8c2SCharles.Forsyth 	static int now = 1;
83*74a4d8c2SCharles.Forsyth 
84*74a4d8c2SCharles.Forsyth 	if (setvec == 0) {	/* first time through any RE */
85*74a4d8c2SCharles.Forsyth 		maxsetvec = MAXLIN;
86*74a4d8c2SCharles.Forsyth 		setvec = (int *) malloc(maxsetvec * sizeof(int));
87*74a4d8c2SCharles.Forsyth 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88*74a4d8c2SCharles.Forsyth 		if (setvec == 0 || tmpset == 0)
89*74a4d8c2SCharles.Forsyth 			overflo("out of space initializing makedfa");
90*74a4d8c2SCharles.Forsyth 	}
91*74a4d8c2SCharles.Forsyth 
92*74a4d8c2SCharles.Forsyth 	if (compile_time)	/* a constant for sure */
93*74a4d8c2SCharles.Forsyth 		return mkdfa(s, anchor);
94*74a4d8c2SCharles.Forsyth 	for (i = 0; i < nfatab; i++)	/* is it there already? */
95*74a4d8c2SCharles.Forsyth 		if (fatab[i]->anchor == anchor
96*74a4d8c2SCharles.Forsyth 		  && strcmp(fatab[i]->restr, s) == 0) {
97*74a4d8c2SCharles.Forsyth 			fatab[i]->use = now++;
98*74a4d8c2SCharles.Forsyth 			return fatab[i];
99*74a4d8c2SCharles.Forsyth 		}
100*74a4d8c2SCharles.Forsyth 	pfa = mkdfa(s, anchor);
101*74a4d8c2SCharles.Forsyth 	if (nfatab < NFA) {	/* room for another */
102*74a4d8c2SCharles.Forsyth 		fatab[nfatab] = pfa;
103*74a4d8c2SCharles.Forsyth 		fatab[nfatab]->use = now++;
104*74a4d8c2SCharles.Forsyth 		nfatab++;
105*74a4d8c2SCharles.Forsyth 		return pfa;
106*74a4d8c2SCharles.Forsyth 	}
107*74a4d8c2SCharles.Forsyth 	use = fatab[0]->use;	/* replace least-recently used */
108*74a4d8c2SCharles.Forsyth 	nuse = 0;
109*74a4d8c2SCharles.Forsyth 	for (i = 1; i < nfatab; i++)
110*74a4d8c2SCharles.Forsyth 		if (fatab[i]->use < use) {
111*74a4d8c2SCharles.Forsyth 			use = fatab[i]->use;
112*74a4d8c2SCharles.Forsyth 			nuse = i;
113*74a4d8c2SCharles.Forsyth 		}
114*74a4d8c2SCharles.Forsyth 	freefa(fatab[nuse]);
115*74a4d8c2SCharles.Forsyth 	fatab[nuse] = pfa;
116*74a4d8c2SCharles.Forsyth 	pfa->use = now++;
117*74a4d8c2SCharles.Forsyth 	return pfa;
118*74a4d8c2SCharles.Forsyth }
119*74a4d8c2SCharles.Forsyth 
mkdfa(char * s,int anchor)120*74a4d8c2SCharles.Forsyth fa *mkdfa(char *s, int anchor)	/* does the real work of making a dfa */
121*74a4d8c2SCharles.Forsyth 				/* anchor = 1 for anchored matches, else 0 */
122*74a4d8c2SCharles.Forsyth {
123*74a4d8c2SCharles.Forsyth 	Node *p, *p1;
124*74a4d8c2SCharles.Forsyth 	fa *f;
125*74a4d8c2SCharles.Forsyth 
126*74a4d8c2SCharles.Forsyth 	p = reparse(s);
127*74a4d8c2SCharles.Forsyth 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128*74a4d8c2SCharles.Forsyth 		/* put ALL STAR in front of reg.  exp. */
129*74a4d8c2SCharles.Forsyth 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130*74a4d8c2SCharles.Forsyth 		/* put FINAL after reg.  exp. */
131*74a4d8c2SCharles.Forsyth 
132*74a4d8c2SCharles.Forsyth 	poscnt = 0;
133*74a4d8c2SCharles.Forsyth 	penter(p1);	/* enter parent pointers and leaf indices */
134*74a4d8c2SCharles.Forsyth 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135*74a4d8c2SCharles.Forsyth 		overflo("out of space for fa");
136*74a4d8c2SCharles.Forsyth 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137*74a4d8c2SCharles.Forsyth 	cfoll(f, p1);	/* set up follow sets */
138*74a4d8c2SCharles.Forsyth 	freetr(p1);
139*74a4d8c2SCharles.Forsyth 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140*74a4d8c2SCharles.Forsyth 			overflo("out of space in makedfa");
141*74a4d8c2SCharles.Forsyth 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142*74a4d8c2SCharles.Forsyth 		overflo("out of space in makedfa");
143*74a4d8c2SCharles.Forsyth 	*f->posns[1] = 0;
144*74a4d8c2SCharles.Forsyth 	f->initstat = makeinit(f, anchor);
145*74a4d8c2SCharles.Forsyth 	f->anchor = anchor;
146*74a4d8c2SCharles.Forsyth 	f->restr = (uschar *) tostring(s);
147*74a4d8c2SCharles.Forsyth 	return f;
148*74a4d8c2SCharles.Forsyth }
149*74a4d8c2SCharles.Forsyth 
makeinit(fa * f,int anchor)150*74a4d8c2SCharles.Forsyth int makeinit(fa *f, int anchor)
151*74a4d8c2SCharles.Forsyth {
152*74a4d8c2SCharles.Forsyth 	int i, k;
153*74a4d8c2SCharles.Forsyth 
154*74a4d8c2SCharles.Forsyth 	f->curstat = 2;
155*74a4d8c2SCharles.Forsyth 	f->out[2] = 0;
156*74a4d8c2SCharles.Forsyth 	f->reset = 0;
157*74a4d8c2SCharles.Forsyth 	k = *(f->re[0].lfollow);
158*74a4d8c2SCharles.Forsyth 	xfree(f->posns[2]);
159*74a4d8c2SCharles.Forsyth 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160*74a4d8c2SCharles.Forsyth 		overflo("out of space in makeinit");
161*74a4d8c2SCharles.Forsyth 	for (i=0; i <= k; i++) {
162*74a4d8c2SCharles.Forsyth 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163*74a4d8c2SCharles.Forsyth 	}
164*74a4d8c2SCharles.Forsyth 	if ((f->posns[2])[1] == f->accept)
165*74a4d8c2SCharles.Forsyth 		f->out[2] = 1;
166*74a4d8c2SCharles.Forsyth 	for (i=0; i < NCHARS; i++)
167*74a4d8c2SCharles.Forsyth 		f->gototab[2][i] = 0;
168*74a4d8c2SCharles.Forsyth 	f->curstat = cgoto(f, 2, HAT);
169*74a4d8c2SCharles.Forsyth 	if (anchor) {
170*74a4d8c2SCharles.Forsyth 		*f->posns[2] = k-1;	/* leave out position 0 */
171*74a4d8c2SCharles.Forsyth 		for (i=0; i < k; i++) {
172*74a4d8c2SCharles.Forsyth 			(f->posns[0])[i] = (f->posns[2])[i];
173*74a4d8c2SCharles.Forsyth 		}
174*74a4d8c2SCharles.Forsyth 
175*74a4d8c2SCharles.Forsyth 		f->out[0] = f->out[2];
176*74a4d8c2SCharles.Forsyth 		if (f->curstat != 2)
177*74a4d8c2SCharles.Forsyth 			--(*f->posns[f->curstat]);
178*74a4d8c2SCharles.Forsyth 	}
179*74a4d8c2SCharles.Forsyth 	return f->curstat;
180*74a4d8c2SCharles.Forsyth }
181*74a4d8c2SCharles.Forsyth 
penter(Node * p)182*74a4d8c2SCharles.Forsyth void penter(Node *p)	/* set up parent pointers and leaf indices */
183*74a4d8c2SCharles.Forsyth {
184*74a4d8c2SCharles.Forsyth 	switch (type(p)) {
185*74a4d8c2SCharles.Forsyth 	LEAF
186*74a4d8c2SCharles.Forsyth 		info(p) = poscnt;
187*74a4d8c2SCharles.Forsyth 		poscnt++;
188*74a4d8c2SCharles.Forsyth 		break;
189*74a4d8c2SCharles.Forsyth 	UNARY
190*74a4d8c2SCharles.Forsyth 		penter(left(p));
191*74a4d8c2SCharles.Forsyth 		parent(left(p)) = p;
192*74a4d8c2SCharles.Forsyth 		break;
193*74a4d8c2SCharles.Forsyth 	case CAT:
194*74a4d8c2SCharles.Forsyth 	case OR:
195*74a4d8c2SCharles.Forsyth 		penter(left(p));
196*74a4d8c2SCharles.Forsyth 		penter(right(p));
197*74a4d8c2SCharles.Forsyth 		parent(left(p)) = p;
198*74a4d8c2SCharles.Forsyth 		parent(right(p)) = p;
199*74a4d8c2SCharles.Forsyth 		break;
200*74a4d8c2SCharles.Forsyth 	default:	/* can't happen */
201*74a4d8c2SCharles.Forsyth 		FATAL("can't happen: unknown type %d in penter", type(p));
202*74a4d8c2SCharles.Forsyth 		break;
203*74a4d8c2SCharles.Forsyth 	}
204*74a4d8c2SCharles.Forsyth }
205*74a4d8c2SCharles.Forsyth 
freetr(Node * p)206*74a4d8c2SCharles.Forsyth void freetr(Node *p)	/* free parse tree */
207*74a4d8c2SCharles.Forsyth {
208*74a4d8c2SCharles.Forsyth 	switch (type(p)) {
209*74a4d8c2SCharles.Forsyth 	LEAF
210*74a4d8c2SCharles.Forsyth 		xfree(p);
211*74a4d8c2SCharles.Forsyth 		break;
212*74a4d8c2SCharles.Forsyth 	UNARY
213*74a4d8c2SCharles.Forsyth 		freetr(left(p));
214*74a4d8c2SCharles.Forsyth 		xfree(p);
215*74a4d8c2SCharles.Forsyth 		break;
216*74a4d8c2SCharles.Forsyth 	case CAT:
217*74a4d8c2SCharles.Forsyth 	case OR:
218*74a4d8c2SCharles.Forsyth 		freetr(left(p));
219*74a4d8c2SCharles.Forsyth 		freetr(right(p));
220*74a4d8c2SCharles.Forsyth 		xfree(p);
221*74a4d8c2SCharles.Forsyth 		break;
222*74a4d8c2SCharles.Forsyth 	default:	/* can't happen */
223*74a4d8c2SCharles.Forsyth 		FATAL("can't happen: unknown type %d in freetr", type(p));
224*74a4d8c2SCharles.Forsyth 		break;
225*74a4d8c2SCharles.Forsyth 	}
226*74a4d8c2SCharles.Forsyth }
227*74a4d8c2SCharles.Forsyth 
228*74a4d8c2SCharles.Forsyth /* in the parsing of regular expressions, metacharacters like . have */
229*74a4d8c2SCharles.Forsyth /* to be seen literally;  \056 is not a metacharacter. */
230*74a4d8c2SCharles.Forsyth 
hexstr(char ** pp)231*74a4d8c2SCharles.Forsyth int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232*74a4d8c2SCharles.Forsyth {			/* only pick up one 8-bit byte (2 chars) */
233*74a4d8c2SCharles.Forsyth 	uschar *p;
234*74a4d8c2SCharles.Forsyth 	int n = 0;
235*74a4d8c2SCharles.Forsyth 	int i;
236*74a4d8c2SCharles.Forsyth 
237*74a4d8c2SCharles.Forsyth 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238*74a4d8c2SCharles.Forsyth 		if (isdigit(*p))
239*74a4d8c2SCharles.Forsyth 			n = 16 * n + *p - '0';
240*74a4d8c2SCharles.Forsyth 		else if (*p >= 'a' && *p <= 'f')
241*74a4d8c2SCharles.Forsyth 			n = 16 * n + *p - 'a' + 10;
242*74a4d8c2SCharles.Forsyth 		else if (*p >= 'A' && *p <= 'F')
243*74a4d8c2SCharles.Forsyth 			n = 16 * n + *p - 'A' + 10;
244*74a4d8c2SCharles.Forsyth 	}
245*74a4d8c2SCharles.Forsyth 	*pp = (char *) p;
246*74a4d8c2SCharles.Forsyth 	return n;
247*74a4d8c2SCharles.Forsyth }
248*74a4d8c2SCharles.Forsyth 
249*74a4d8c2SCharles.Forsyth #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250*74a4d8c2SCharles.Forsyth 
quoted(char ** pp)251*74a4d8c2SCharles.Forsyth int quoted(char **pp)	/* pick up next thing after a \\ */
252*74a4d8c2SCharles.Forsyth 			/* and increment *pp */
253*74a4d8c2SCharles.Forsyth {
254*74a4d8c2SCharles.Forsyth 	char *p = *pp;
255*74a4d8c2SCharles.Forsyth 	int c;
256*74a4d8c2SCharles.Forsyth 
257*74a4d8c2SCharles.Forsyth 	if ((c = *p++) == 't')
258*74a4d8c2SCharles.Forsyth 		c = '\t';
259*74a4d8c2SCharles.Forsyth 	else if (c == 'n')
260*74a4d8c2SCharles.Forsyth 		c = '\n';
261*74a4d8c2SCharles.Forsyth 	else if (c == 'f')
262*74a4d8c2SCharles.Forsyth 		c = '\f';
263*74a4d8c2SCharles.Forsyth 	else if (c == 'r')
264*74a4d8c2SCharles.Forsyth 		c = '\r';
265*74a4d8c2SCharles.Forsyth 	else if (c == 'b')
266*74a4d8c2SCharles.Forsyth 		c = '\b';
267*74a4d8c2SCharles.Forsyth 	else if (c == '\\')
268*74a4d8c2SCharles.Forsyth 		c = '\\';
269*74a4d8c2SCharles.Forsyth 	else if (c == 'x') {	/* hexadecimal goo follows */
270*74a4d8c2SCharles.Forsyth 		c = hexstr(&p);	/* this adds a null if number is invalid */
271*74a4d8c2SCharles.Forsyth 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272*74a4d8c2SCharles.Forsyth 		int n = c - '0';
273*74a4d8c2SCharles.Forsyth 		if (isoctdigit(*p)) {
274*74a4d8c2SCharles.Forsyth 			n = 8 * n + *p++ - '0';
275*74a4d8c2SCharles.Forsyth 			if (isoctdigit(*p))
276*74a4d8c2SCharles.Forsyth 				n = 8 * n + *p++ - '0';
277*74a4d8c2SCharles.Forsyth 		}
278*74a4d8c2SCharles.Forsyth 		c = n;
279*74a4d8c2SCharles.Forsyth 	} /* else */
280*74a4d8c2SCharles.Forsyth 		/* c = c; */
281*74a4d8c2SCharles.Forsyth 	*pp = p;
282*74a4d8c2SCharles.Forsyth 	return c;
283*74a4d8c2SCharles.Forsyth }
284*74a4d8c2SCharles.Forsyth 
cclenter(char * argp)285*74a4d8c2SCharles.Forsyth char *cclenter(char *argp)	/* add a character class */
286*74a4d8c2SCharles.Forsyth {
287*74a4d8c2SCharles.Forsyth 	int i, c, c2;
288*74a4d8c2SCharles.Forsyth 	uschar *p = (uschar *) argp;
289*74a4d8c2SCharles.Forsyth 	uschar *op, *bp;
290*74a4d8c2SCharles.Forsyth 	static uschar *buf = 0;
291*74a4d8c2SCharles.Forsyth 	static int bufsz = 100;
292*74a4d8c2SCharles.Forsyth 
293*74a4d8c2SCharles.Forsyth 	op = p;
294*74a4d8c2SCharles.Forsyth 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295*74a4d8c2SCharles.Forsyth 		FATAL("out of space for character class [%.10s...] 1", p);
296*74a4d8c2SCharles.Forsyth 	bp = buf;
297*74a4d8c2SCharles.Forsyth 	for (i = 0; (c = *p++) != 0; ) {
298*74a4d8c2SCharles.Forsyth 		if (c == '\\') {
299*74a4d8c2SCharles.Forsyth 			c = quoted((char **) &p);
300*74a4d8c2SCharles.Forsyth 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
301*74a4d8c2SCharles.Forsyth 			if (*p != 0) {
302*74a4d8c2SCharles.Forsyth 				c = bp[-1];
303*74a4d8c2SCharles.Forsyth 				c2 = *p++;
304*74a4d8c2SCharles.Forsyth 				if (c2 == '\\')
305*74a4d8c2SCharles.Forsyth 					c2 = quoted((char **) &p);
306*74a4d8c2SCharles.Forsyth 				if (c > c2) {	/* empty; ignore */
307*74a4d8c2SCharles.Forsyth 					bp--;
308*74a4d8c2SCharles.Forsyth 					i--;
309*74a4d8c2SCharles.Forsyth 					continue;
310*74a4d8c2SCharles.Forsyth 				}
311*74a4d8c2SCharles.Forsyth 				while (c < c2) {
312*74a4d8c2SCharles.Forsyth 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313*74a4d8c2SCharles.Forsyth 						FATAL("out of space for character class [%.10s...] 2", p);
314*74a4d8c2SCharles.Forsyth 					*bp++ = ++c;
315*74a4d8c2SCharles.Forsyth 					i++;
316*74a4d8c2SCharles.Forsyth 				}
317*74a4d8c2SCharles.Forsyth 				continue;
318*74a4d8c2SCharles.Forsyth 			}
319*74a4d8c2SCharles.Forsyth 		}
320*74a4d8c2SCharles.Forsyth 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321*74a4d8c2SCharles.Forsyth 			FATAL("out of space for character class [%.10s...] 3", p);
322*74a4d8c2SCharles.Forsyth 		*bp++ = c;
323*74a4d8c2SCharles.Forsyth 		i++;
324*74a4d8c2SCharles.Forsyth 	}
325*74a4d8c2SCharles.Forsyth 	*bp = 0;
326*74a4d8c2SCharles.Forsyth 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327*74a4d8c2SCharles.Forsyth 	xfree(op);
328*74a4d8c2SCharles.Forsyth 	return (char *) tostring((char *) buf);
329*74a4d8c2SCharles.Forsyth }
330*74a4d8c2SCharles.Forsyth 
overflo(char * s)331*74a4d8c2SCharles.Forsyth void overflo(char *s)
332*74a4d8c2SCharles.Forsyth {
333*74a4d8c2SCharles.Forsyth 	FATAL("regular expression too big: %.30s...", s);
334*74a4d8c2SCharles.Forsyth }
335*74a4d8c2SCharles.Forsyth 
cfoll(fa * f,Node * v)336*74a4d8c2SCharles.Forsyth void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
337*74a4d8c2SCharles.Forsyth {
338*74a4d8c2SCharles.Forsyth 	int i;
339*74a4d8c2SCharles.Forsyth 	int *p;
340*74a4d8c2SCharles.Forsyth 
341*74a4d8c2SCharles.Forsyth 	switch (type(v)) {
342*74a4d8c2SCharles.Forsyth 	LEAF
343*74a4d8c2SCharles.Forsyth 		f->re[info(v)].ltype = type(v);
344*74a4d8c2SCharles.Forsyth 		f->re[info(v)].lval.np = right(v);
345*74a4d8c2SCharles.Forsyth 		while (f->accept >= maxsetvec) {	/* guessing here! */
346*74a4d8c2SCharles.Forsyth 			maxsetvec *= 4;
347*74a4d8c2SCharles.Forsyth 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348*74a4d8c2SCharles.Forsyth 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349*74a4d8c2SCharles.Forsyth 			if (setvec == 0 || tmpset == 0)
350*74a4d8c2SCharles.Forsyth 				overflo("out of space in cfoll()");
351*74a4d8c2SCharles.Forsyth 		}
352*74a4d8c2SCharles.Forsyth 		for (i = 0; i <= f->accept; i++)
353*74a4d8c2SCharles.Forsyth 			setvec[i] = 0;
354*74a4d8c2SCharles.Forsyth 		setcnt = 0;
355*74a4d8c2SCharles.Forsyth 		follow(v);	/* computes setvec and setcnt */
356*74a4d8c2SCharles.Forsyth 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357*74a4d8c2SCharles.Forsyth 			overflo("out of space building follow set");
358*74a4d8c2SCharles.Forsyth 		f->re[info(v)].lfollow = p;
359*74a4d8c2SCharles.Forsyth 		*p = setcnt;
360*74a4d8c2SCharles.Forsyth 		for (i = f->accept; i >= 0; i--)
361*74a4d8c2SCharles.Forsyth 			if (setvec[i] == 1)
362*74a4d8c2SCharles.Forsyth 				*++p = i;
363*74a4d8c2SCharles.Forsyth 		break;
364*74a4d8c2SCharles.Forsyth 	UNARY
365*74a4d8c2SCharles.Forsyth 		cfoll(f,left(v));
366*74a4d8c2SCharles.Forsyth 		break;
367*74a4d8c2SCharles.Forsyth 	case CAT:
368*74a4d8c2SCharles.Forsyth 	case OR:
369*74a4d8c2SCharles.Forsyth 		cfoll(f,left(v));
370*74a4d8c2SCharles.Forsyth 		cfoll(f,right(v));
371*74a4d8c2SCharles.Forsyth 		break;
372*74a4d8c2SCharles.Forsyth 	default:	/* can't happen */
373*74a4d8c2SCharles.Forsyth 		FATAL("can't happen: unknown type %d in cfoll", type(v));
374*74a4d8c2SCharles.Forsyth 	}
375*74a4d8c2SCharles.Forsyth }
376*74a4d8c2SCharles.Forsyth 
first(Node * p)377*74a4d8c2SCharles.Forsyth int first(Node *p)	/* collects initially active leaves of p into setvec */
378*74a4d8c2SCharles.Forsyth 			/* returns 1 if p matches empty string */
379*74a4d8c2SCharles.Forsyth {
380*74a4d8c2SCharles.Forsyth 	int b, lp;
381*74a4d8c2SCharles.Forsyth 
382*74a4d8c2SCharles.Forsyth 	switch (type(p)) {
383*74a4d8c2SCharles.Forsyth 	LEAF
384*74a4d8c2SCharles.Forsyth 		lp = info(p);	/* look for high-water mark of subscripts */
385*74a4d8c2SCharles.Forsyth 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
386*74a4d8c2SCharles.Forsyth 			maxsetvec *= 4;
387*74a4d8c2SCharles.Forsyth 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388*74a4d8c2SCharles.Forsyth 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389*74a4d8c2SCharles.Forsyth 			if (setvec == 0 || tmpset == 0)
390*74a4d8c2SCharles.Forsyth 				overflo("out of space in first()");
391*74a4d8c2SCharles.Forsyth 		}
392*74a4d8c2SCharles.Forsyth 		if (setvec[lp] != 1) {
393*74a4d8c2SCharles.Forsyth 			setvec[lp] = 1;
394*74a4d8c2SCharles.Forsyth 			setcnt++;
395*74a4d8c2SCharles.Forsyth 		}
396*74a4d8c2SCharles.Forsyth 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
397*74a4d8c2SCharles.Forsyth 			return(0);		/* empty CCL */
398*74a4d8c2SCharles.Forsyth 		else return(1);
399*74a4d8c2SCharles.Forsyth 	case PLUS:
400*74a4d8c2SCharles.Forsyth 		if (first(left(p)) == 0) return(0);
401*74a4d8c2SCharles.Forsyth 		return(1);
402*74a4d8c2SCharles.Forsyth 	case STAR:
403*74a4d8c2SCharles.Forsyth 	case QUEST:
404*74a4d8c2SCharles.Forsyth 		first(left(p));
405*74a4d8c2SCharles.Forsyth 		return(0);
406*74a4d8c2SCharles.Forsyth 	case CAT:
407*74a4d8c2SCharles.Forsyth 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408*74a4d8c2SCharles.Forsyth 		return(1);
409*74a4d8c2SCharles.Forsyth 	case OR:
410*74a4d8c2SCharles.Forsyth 		b = first(right(p));
411*74a4d8c2SCharles.Forsyth 		if (first(left(p)) == 0 || b == 0) return(0);
412*74a4d8c2SCharles.Forsyth 		return(1);
413*74a4d8c2SCharles.Forsyth 	}
414*74a4d8c2SCharles.Forsyth 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
415*74a4d8c2SCharles.Forsyth 	return(-1);
416*74a4d8c2SCharles.Forsyth }
417*74a4d8c2SCharles.Forsyth 
follow(Node * v)418*74a4d8c2SCharles.Forsyth void follow(Node *v)	/* collects leaves that can follow v into setvec */
419*74a4d8c2SCharles.Forsyth {
420*74a4d8c2SCharles.Forsyth 	Node *p;
421*74a4d8c2SCharles.Forsyth 
422*74a4d8c2SCharles.Forsyth 	if (type(v) == FINAL)
423*74a4d8c2SCharles.Forsyth 		return;
424*74a4d8c2SCharles.Forsyth 	p = parent(v);
425*74a4d8c2SCharles.Forsyth 	switch (type(p)) {
426*74a4d8c2SCharles.Forsyth 	case STAR:
427*74a4d8c2SCharles.Forsyth 	case PLUS:
428*74a4d8c2SCharles.Forsyth 		first(v);
429*74a4d8c2SCharles.Forsyth 		follow(p);
430*74a4d8c2SCharles.Forsyth 		return;
431*74a4d8c2SCharles.Forsyth 
432*74a4d8c2SCharles.Forsyth 	case OR:
433*74a4d8c2SCharles.Forsyth 	case QUEST:
434*74a4d8c2SCharles.Forsyth 		follow(p);
435*74a4d8c2SCharles.Forsyth 		return;
436*74a4d8c2SCharles.Forsyth 
437*74a4d8c2SCharles.Forsyth 	case CAT:
438*74a4d8c2SCharles.Forsyth 		if (v == left(p)) {	/* v is left child of p */
439*74a4d8c2SCharles.Forsyth 			if (first(right(p)) == 0) {
440*74a4d8c2SCharles.Forsyth 				follow(p);
441*74a4d8c2SCharles.Forsyth 				return;
442*74a4d8c2SCharles.Forsyth 			}
443*74a4d8c2SCharles.Forsyth 		} else		/* v is right child */
444*74a4d8c2SCharles.Forsyth 			follow(p);
445*74a4d8c2SCharles.Forsyth 		return;
446*74a4d8c2SCharles.Forsyth 	}
447*74a4d8c2SCharles.Forsyth }
448*74a4d8c2SCharles.Forsyth 
member(int c,char * sarg)449*74a4d8c2SCharles.Forsyth int member(int c, char *sarg)	/* is c in s? */
450*74a4d8c2SCharles.Forsyth {
451*74a4d8c2SCharles.Forsyth 	uschar *s = (uschar *) sarg;
452*74a4d8c2SCharles.Forsyth 
453*74a4d8c2SCharles.Forsyth 	while (*s)
454*74a4d8c2SCharles.Forsyth 		if (c == *s++)
455*74a4d8c2SCharles.Forsyth 			return(1);
456*74a4d8c2SCharles.Forsyth 	return(0);
457*74a4d8c2SCharles.Forsyth }
458*74a4d8c2SCharles.Forsyth 
match(fa * f,char * p0)459*74a4d8c2SCharles.Forsyth int match(fa *f, char *p0)	/* shortest match ? */
460*74a4d8c2SCharles.Forsyth {
461*74a4d8c2SCharles.Forsyth 	int s, ns;
462*74a4d8c2SCharles.Forsyth 	uschar *p = (uschar *) p0;
463*74a4d8c2SCharles.Forsyth 
464*74a4d8c2SCharles.Forsyth 	s = f->reset ? makeinit(f,0) : f->initstat;
465*74a4d8c2SCharles.Forsyth 	if (f->out[s])
466*74a4d8c2SCharles.Forsyth 		return(1);
467*74a4d8c2SCharles.Forsyth 	do {
468*74a4d8c2SCharles.Forsyth 		if ((ns = f->gototab[s][*p]) != 0)
469*74a4d8c2SCharles.Forsyth 			s = ns;
470*74a4d8c2SCharles.Forsyth 		else
471*74a4d8c2SCharles.Forsyth 			s = cgoto(f, s, *p);
472*74a4d8c2SCharles.Forsyth 		if (f->out[s])
473*74a4d8c2SCharles.Forsyth 			return(1);
474*74a4d8c2SCharles.Forsyth 	} while (*p++ != 0);
475*74a4d8c2SCharles.Forsyth 	return(0);
476*74a4d8c2SCharles.Forsyth }
477*74a4d8c2SCharles.Forsyth 
pmatch(fa * f,char * p0)478*74a4d8c2SCharles.Forsyth int pmatch(fa *f, char *p0)	/* longest match, for sub */
479*74a4d8c2SCharles.Forsyth {
480*74a4d8c2SCharles.Forsyth 	int s, ns;
481*74a4d8c2SCharles.Forsyth 	uschar *p = (uschar *) p0;
482*74a4d8c2SCharles.Forsyth 	uschar *q;
483*74a4d8c2SCharles.Forsyth 	int i, k;
484*74a4d8c2SCharles.Forsyth 
485*74a4d8c2SCharles.Forsyth 	s = f->reset ? makeinit(f,1) : f->initstat;
486*74a4d8c2SCharles.Forsyth 	patbeg = (char *) p;
487*74a4d8c2SCharles.Forsyth 	patlen = -1;
488*74a4d8c2SCharles.Forsyth 	do {
489*74a4d8c2SCharles.Forsyth 		q = p;
490*74a4d8c2SCharles.Forsyth 		do {
491*74a4d8c2SCharles.Forsyth 			if (f->out[s])		/* final state */
492*74a4d8c2SCharles.Forsyth 				patlen = q-p;
493*74a4d8c2SCharles.Forsyth 			if ((ns = f->gototab[s][*q]) != 0)
494*74a4d8c2SCharles.Forsyth 				s = ns;
495*74a4d8c2SCharles.Forsyth 			else
496*74a4d8c2SCharles.Forsyth 				s = cgoto(f, s, *q);
497*74a4d8c2SCharles.Forsyth 			if (s == 1) {	/* no transition */
498*74a4d8c2SCharles.Forsyth 				if (patlen >= 0) {
499*74a4d8c2SCharles.Forsyth 					patbeg = (char *) p;
500*74a4d8c2SCharles.Forsyth 					return(1);
501*74a4d8c2SCharles.Forsyth 				}
502*74a4d8c2SCharles.Forsyth 				else
503*74a4d8c2SCharles.Forsyth 					goto nextin;	/* no match */
504*74a4d8c2SCharles.Forsyth 			}
505*74a4d8c2SCharles.Forsyth 		} while (*q++ != 0);
506*74a4d8c2SCharles.Forsyth 		if (f->out[s])
507*74a4d8c2SCharles.Forsyth 			patlen = q-p-1;	/* don't count $ */
508*74a4d8c2SCharles.Forsyth 		if (patlen >= 0) {
509*74a4d8c2SCharles.Forsyth 			patbeg = (char *) p;
510*74a4d8c2SCharles.Forsyth 			return(1);
511*74a4d8c2SCharles.Forsyth 		}
512*74a4d8c2SCharles.Forsyth 	nextin:
513*74a4d8c2SCharles.Forsyth 		s = 2;
514*74a4d8c2SCharles.Forsyth 		if (f->reset) {
515*74a4d8c2SCharles.Forsyth 			for (i = 2; i <= f->curstat; i++)
516*74a4d8c2SCharles.Forsyth 				xfree(f->posns[i]);
517*74a4d8c2SCharles.Forsyth 			k = *f->posns[0];
518*74a4d8c2SCharles.Forsyth 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
519*74a4d8c2SCharles.Forsyth 				overflo("out of space in pmatch");
520*74a4d8c2SCharles.Forsyth 			for (i = 0; i <= k; i++)
521*74a4d8c2SCharles.Forsyth 				(f->posns[2])[i] = (f->posns[0])[i];
522*74a4d8c2SCharles.Forsyth 			f->initstat = f->curstat = 2;
523*74a4d8c2SCharles.Forsyth 			f->out[2] = f->out[0];
524*74a4d8c2SCharles.Forsyth 			for (i = 0; i < NCHARS; i++)
525*74a4d8c2SCharles.Forsyth 				f->gototab[2][i] = 0;
526*74a4d8c2SCharles.Forsyth 		}
527*74a4d8c2SCharles.Forsyth 	} while (*p++ != 0);
528*74a4d8c2SCharles.Forsyth 	return (0);
529*74a4d8c2SCharles.Forsyth }
530*74a4d8c2SCharles.Forsyth 
nematch(fa * f,char * p0)531*74a4d8c2SCharles.Forsyth int nematch(fa *f, char *p0)	/* non-empty match, for sub */
532*74a4d8c2SCharles.Forsyth {
533*74a4d8c2SCharles.Forsyth 	int s, ns;
534*74a4d8c2SCharles.Forsyth 	uschar *p = (uschar *) p0;
535*74a4d8c2SCharles.Forsyth 	uschar *q;
536*74a4d8c2SCharles.Forsyth 	int i, k;
537*74a4d8c2SCharles.Forsyth 
538*74a4d8c2SCharles.Forsyth 	s = f->reset ? makeinit(f,1) : f->initstat;
539*74a4d8c2SCharles.Forsyth 	patlen = -1;
540*74a4d8c2SCharles.Forsyth 	while (*p) {
541*74a4d8c2SCharles.Forsyth 		q = p;
542*74a4d8c2SCharles.Forsyth 		do {
543*74a4d8c2SCharles.Forsyth 			if (f->out[s])		/* final state */
544*74a4d8c2SCharles.Forsyth 				patlen = q-p;
545*74a4d8c2SCharles.Forsyth 			if ((ns = f->gototab[s][*q]) != 0)
546*74a4d8c2SCharles.Forsyth 				s = ns;
547*74a4d8c2SCharles.Forsyth 			else
548*74a4d8c2SCharles.Forsyth 				s = cgoto(f, s, *q);
549*74a4d8c2SCharles.Forsyth 			if (s == 1) {	/* no transition */
550*74a4d8c2SCharles.Forsyth 				if (patlen > 0) {
551*74a4d8c2SCharles.Forsyth 					patbeg = (char *) p;
552*74a4d8c2SCharles.Forsyth 					return(1);
553*74a4d8c2SCharles.Forsyth 				} else
554*74a4d8c2SCharles.Forsyth 					goto nnextin;	/* no nonempty match */
555*74a4d8c2SCharles.Forsyth 			}
556*74a4d8c2SCharles.Forsyth 		} while (*q++ != 0);
557*74a4d8c2SCharles.Forsyth 		if (f->out[s])
558*74a4d8c2SCharles.Forsyth 			patlen = q-p-1;	/* don't count $ */
559*74a4d8c2SCharles.Forsyth 		if (patlen > 0 ) {
560*74a4d8c2SCharles.Forsyth 			patbeg = (char *) p;
561*74a4d8c2SCharles.Forsyth 			return(1);
562*74a4d8c2SCharles.Forsyth 		}
563*74a4d8c2SCharles.Forsyth 	nnextin:
564*74a4d8c2SCharles.Forsyth 		s = 2;
565*74a4d8c2SCharles.Forsyth 		if (f->reset) {
566*74a4d8c2SCharles.Forsyth 			for (i = 2; i <= f->curstat; i++)
567*74a4d8c2SCharles.Forsyth 				xfree(f->posns[i]);
568*74a4d8c2SCharles.Forsyth 			k = *f->posns[0];
569*74a4d8c2SCharles.Forsyth 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
570*74a4d8c2SCharles.Forsyth 				overflo("out of state space");
571*74a4d8c2SCharles.Forsyth 			for (i = 0; i <= k; i++)
572*74a4d8c2SCharles.Forsyth 				(f->posns[2])[i] = (f->posns[0])[i];
573*74a4d8c2SCharles.Forsyth 			f->initstat = f->curstat = 2;
574*74a4d8c2SCharles.Forsyth 			f->out[2] = f->out[0];
575*74a4d8c2SCharles.Forsyth 			for (i = 0; i < NCHARS; i++)
576*74a4d8c2SCharles.Forsyth 				f->gototab[2][i] = 0;
577*74a4d8c2SCharles.Forsyth 		}
578*74a4d8c2SCharles.Forsyth 		p++;
579*74a4d8c2SCharles.Forsyth 	}
580*74a4d8c2SCharles.Forsyth 	return (0);
581*74a4d8c2SCharles.Forsyth }
582*74a4d8c2SCharles.Forsyth 
reparse(char * p)583*74a4d8c2SCharles.Forsyth Node *reparse(char *p)	/* parses regular expression pointed to by p */
584*74a4d8c2SCharles.Forsyth {			/* uses relex() to scan regular expression */
585*74a4d8c2SCharles.Forsyth 	Node *np;
586*74a4d8c2SCharles.Forsyth 
587*74a4d8c2SCharles.Forsyth 	dprintf( ("reparse <%s>\n", p) );
588*74a4d8c2SCharles.Forsyth 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
589*74a4d8c2SCharles.Forsyth 	rtok = relex();
590*74a4d8c2SCharles.Forsyth 	if (rtok == '\0')
591*74a4d8c2SCharles.Forsyth 		FATAL("empty regular expression");
592*74a4d8c2SCharles.Forsyth 	np = regexp();
593*74a4d8c2SCharles.Forsyth 	if (rtok != '\0')
594*74a4d8c2SCharles.Forsyth 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
595*74a4d8c2SCharles.Forsyth 	return(np);
596*74a4d8c2SCharles.Forsyth }
597*74a4d8c2SCharles.Forsyth 
regexp(void)598*74a4d8c2SCharles.Forsyth Node *regexp(void)	/* top-level parse of reg expr */
599*74a4d8c2SCharles.Forsyth {
600*74a4d8c2SCharles.Forsyth 	return (alt(concat(primary())));
601*74a4d8c2SCharles.Forsyth }
602*74a4d8c2SCharles.Forsyth 
primary(void)603*74a4d8c2SCharles.Forsyth Node *primary(void)
604*74a4d8c2SCharles.Forsyth {
605*74a4d8c2SCharles.Forsyth 	Node *np;
606*74a4d8c2SCharles.Forsyth 
607*74a4d8c2SCharles.Forsyth 	switch (rtok) {
608*74a4d8c2SCharles.Forsyth 	case CHAR:
609*74a4d8c2SCharles.Forsyth 		np = op2(CHAR, NIL, itonp(rlxval));
610*74a4d8c2SCharles.Forsyth 		rtok = relex();
611*74a4d8c2SCharles.Forsyth 		return (unary(np));
612*74a4d8c2SCharles.Forsyth 	case ALL:
613*74a4d8c2SCharles.Forsyth 		rtok = relex();
614*74a4d8c2SCharles.Forsyth 		return (unary(op2(ALL, NIL, NIL)));
615*74a4d8c2SCharles.Forsyth 	case DOT:
616*74a4d8c2SCharles.Forsyth 		rtok = relex();
617*74a4d8c2SCharles.Forsyth 		return (unary(op2(DOT, NIL, NIL)));
618*74a4d8c2SCharles.Forsyth 	case CCL:
619*74a4d8c2SCharles.Forsyth 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
620*74a4d8c2SCharles.Forsyth 		rtok = relex();
621*74a4d8c2SCharles.Forsyth 		return (unary(np));
622*74a4d8c2SCharles.Forsyth 	case NCCL:
623*74a4d8c2SCharles.Forsyth 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
624*74a4d8c2SCharles.Forsyth 		rtok = relex();
625*74a4d8c2SCharles.Forsyth 		return (unary(np));
626*74a4d8c2SCharles.Forsyth 	case '^':
627*74a4d8c2SCharles.Forsyth 		rtok = relex();
628*74a4d8c2SCharles.Forsyth 		return (unary(op2(CHAR, NIL, itonp(HAT))));
629*74a4d8c2SCharles.Forsyth 	case '$':
630*74a4d8c2SCharles.Forsyth 		rtok = relex();
631*74a4d8c2SCharles.Forsyth 		return (unary(op2(CHAR, NIL, NIL)));
632*74a4d8c2SCharles.Forsyth 	case '(':
633*74a4d8c2SCharles.Forsyth 		rtok = relex();
634*74a4d8c2SCharles.Forsyth 		if (rtok == ')') {	/* special pleading for () */
635*74a4d8c2SCharles.Forsyth 			rtok = relex();
636*74a4d8c2SCharles.Forsyth 			return unary(op2(CCL, NIL, (Node *) tostring("")));
637*74a4d8c2SCharles.Forsyth 		}
638*74a4d8c2SCharles.Forsyth 		np = regexp();
639*74a4d8c2SCharles.Forsyth 		if (rtok == ')') {
640*74a4d8c2SCharles.Forsyth 			rtok = relex();
641*74a4d8c2SCharles.Forsyth 			return (unary(np));
642*74a4d8c2SCharles.Forsyth 		}
643*74a4d8c2SCharles.Forsyth 		else
644*74a4d8c2SCharles.Forsyth 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
645*74a4d8c2SCharles.Forsyth 	default:
646*74a4d8c2SCharles.Forsyth 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
647*74a4d8c2SCharles.Forsyth 	}
648*74a4d8c2SCharles.Forsyth 	return 0;	/*NOTREACHED*/
649*74a4d8c2SCharles.Forsyth }
650*74a4d8c2SCharles.Forsyth 
concat(Node * np)651*74a4d8c2SCharles.Forsyth Node *concat(Node *np)
652*74a4d8c2SCharles.Forsyth {
653*74a4d8c2SCharles.Forsyth 	switch (rtok) {
654*74a4d8c2SCharles.Forsyth 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
655*74a4d8c2SCharles.Forsyth 		return (concat(op2(CAT, np, primary())));
656*74a4d8c2SCharles.Forsyth 	}
657*74a4d8c2SCharles.Forsyth 	return (np);
658*74a4d8c2SCharles.Forsyth }
659*74a4d8c2SCharles.Forsyth 
alt(Node * np)660*74a4d8c2SCharles.Forsyth Node *alt(Node *np)
661*74a4d8c2SCharles.Forsyth {
662*74a4d8c2SCharles.Forsyth 	if (rtok == OR) {
663*74a4d8c2SCharles.Forsyth 		rtok = relex();
664*74a4d8c2SCharles.Forsyth 		return (alt(op2(OR, np, concat(primary()))));
665*74a4d8c2SCharles.Forsyth 	}
666*74a4d8c2SCharles.Forsyth 	return (np);
667*74a4d8c2SCharles.Forsyth }
668*74a4d8c2SCharles.Forsyth 
unary(Node * np)669*74a4d8c2SCharles.Forsyth Node *unary(Node *np)
670*74a4d8c2SCharles.Forsyth {
671*74a4d8c2SCharles.Forsyth 	switch (rtok) {
672*74a4d8c2SCharles.Forsyth 	case STAR:
673*74a4d8c2SCharles.Forsyth 		rtok = relex();
674*74a4d8c2SCharles.Forsyth 		return (unary(op2(STAR, np, NIL)));
675*74a4d8c2SCharles.Forsyth 	case PLUS:
676*74a4d8c2SCharles.Forsyth 		rtok = relex();
677*74a4d8c2SCharles.Forsyth 		return (unary(op2(PLUS, np, NIL)));
678*74a4d8c2SCharles.Forsyth 	case QUEST:
679*74a4d8c2SCharles.Forsyth 		rtok = relex();
680*74a4d8c2SCharles.Forsyth 		return (unary(op2(QUEST, np, NIL)));
681*74a4d8c2SCharles.Forsyth 	default:
682*74a4d8c2SCharles.Forsyth 		return (np);
683*74a4d8c2SCharles.Forsyth 	}
684*74a4d8c2SCharles.Forsyth }
685*74a4d8c2SCharles.Forsyth 
relex(void)686*74a4d8c2SCharles.Forsyth int relex(void)		/* lexical analyzer for reparse */
687*74a4d8c2SCharles.Forsyth {
688*74a4d8c2SCharles.Forsyth 	int c, n;
689*74a4d8c2SCharles.Forsyth 	int cflag;
690*74a4d8c2SCharles.Forsyth 	static uschar *buf = 0;
691*74a4d8c2SCharles.Forsyth 	static int bufsz = 100;
692*74a4d8c2SCharles.Forsyth 	uschar *bp;
693*74a4d8c2SCharles.Forsyth 
694*74a4d8c2SCharles.Forsyth 	switch (c = *prestr++) {
695*74a4d8c2SCharles.Forsyth 	case '|': return OR;
696*74a4d8c2SCharles.Forsyth 	case '*': return STAR;
697*74a4d8c2SCharles.Forsyth 	case '+': return PLUS;
698*74a4d8c2SCharles.Forsyth 	case '?': return QUEST;
699*74a4d8c2SCharles.Forsyth 	case '.': return DOT;
700*74a4d8c2SCharles.Forsyth 	case '\0': prestr--; return '\0';
701*74a4d8c2SCharles.Forsyth 	case '^':
702*74a4d8c2SCharles.Forsyth 	case '$':
703*74a4d8c2SCharles.Forsyth 	case '(':
704*74a4d8c2SCharles.Forsyth 	case ')':
705*74a4d8c2SCharles.Forsyth 		return c;
706*74a4d8c2SCharles.Forsyth 	case '\\':
707*74a4d8c2SCharles.Forsyth 		rlxval = quoted((char **) &prestr);
708*74a4d8c2SCharles.Forsyth 		return CHAR;
709*74a4d8c2SCharles.Forsyth 	default:
710*74a4d8c2SCharles.Forsyth 		rlxval = c;
711*74a4d8c2SCharles.Forsyth 		return CHAR;
712*74a4d8c2SCharles.Forsyth 	case '[':
713*74a4d8c2SCharles.Forsyth 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
714*74a4d8c2SCharles.Forsyth 			FATAL("out of space in reg expr %.10s..", lastre);
715*74a4d8c2SCharles.Forsyth 		bp = buf;
716*74a4d8c2SCharles.Forsyth 		if (*prestr == '^') {
717*74a4d8c2SCharles.Forsyth 			cflag = 1;
718*74a4d8c2SCharles.Forsyth 			prestr++;
719*74a4d8c2SCharles.Forsyth 		}
720*74a4d8c2SCharles.Forsyth 		else
721*74a4d8c2SCharles.Forsyth 			cflag = 0;
722*74a4d8c2SCharles.Forsyth 		n = 2 * strlen(prestr)+1;
723*74a4d8c2SCharles.Forsyth 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
724*74a4d8c2SCharles.Forsyth 			FATAL("out of space for reg expr %.10s...", lastre);
725*74a4d8c2SCharles.Forsyth 		for (; ; ) {
726*74a4d8c2SCharles.Forsyth 			if ((c = *prestr++) == '\\') {
727*74a4d8c2SCharles.Forsyth 				*bp++ = '\\';
728*74a4d8c2SCharles.Forsyth 				if ((c = *prestr++) == '\0')
729*74a4d8c2SCharles.Forsyth 					FATAL("nonterminated character class %.20s...", lastre);
730*74a4d8c2SCharles.Forsyth 				*bp++ = c;
731*74a4d8c2SCharles.Forsyth 			/* } else if (c == '\n') { */
732*74a4d8c2SCharles.Forsyth 			/* 	FATAL("newline in character class %.20s...", lastre); */
733*74a4d8c2SCharles.Forsyth 			} else if (c == '\0') {
734*74a4d8c2SCharles.Forsyth 				FATAL("nonterminated character class %.20s", lastre);
735*74a4d8c2SCharles.Forsyth 			} else if (bp == buf) {	/* 1st char is special */
736*74a4d8c2SCharles.Forsyth 				*bp++ = c;
737*74a4d8c2SCharles.Forsyth 			} else if (c == ']') {
738*74a4d8c2SCharles.Forsyth 				*bp++ = 0;
739*74a4d8c2SCharles.Forsyth 				rlxstr = (uschar *) tostring((char *) buf);
740*74a4d8c2SCharles.Forsyth 				if (cflag == 0)
741*74a4d8c2SCharles.Forsyth 					return CCL;
742*74a4d8c2SCharles.Forsyth 				else
743*74a4d8c2SCharles.Forsyth 					return NCCL;
744*74a4d8c2SCharles.Forsyth 			} else
745*74a4d8c2SCharles.Forsyth 				*bp++ = c;
746*74a4d8c2SCharles.Forsyth 		}
747*74a4d8c2SCharles.Forsyth 	}
748*74a4d8c2SCharles.Forsyth }
749*74a4d8c2SCharles.Forsyth 
cgoto(fa * f,int s,int c)750*74a4d8c2SCharles.Forsyth int cgoto(fa *f, int s, int c)
751*74a4d8c2SCharles.Forsyth {
752*74a4d8c2SCharles.Forsyth 	int i, j, k;
753*74a4d8c2SCharles.Forsyth 	int *p, *q;
754*74a4d8c2SCharles.Forsyth 
755*74a4d8c2SCharles.Forsyth 	if (c < 0 || c > 255)
756*74a4d8c2SCharles.Forsyth 		FATAL("can't happen: neg char %d in cgoto", c);
757*74a4d8c2SCharles.Forsyth 	while (f->accept >= maxsetvec) {	/* guessing here! */
758*74a4d8c2SCharles.Forsyth 		maxsetvec *= 4;
759*74a4d8c2SCharles.Forsyth 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
760*74a4d8c2SCharles.Forsyth 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
761*74a4d8c2SCharles.Forsyth 		if (setvec == 0 || tmpset == 0)
762*74a4d8c2SCharles.Forsyth 			overflo("out of space in cgoto()");
763*74a4d8c2SCharles.Forsyth 	}
764*74a4d8c2SCharles.Forsyth 	for (i = 0; i <= f->accept; i++)
765*74a4d8c2SCharles.Forsyth 		setvec[i] = 0;
766*74a4d8c2SCharles.Forsyth 	setcnt = 0;
767*74a4d8c2SCharles.Forsyth 	/* compute positions of gototab[s,c] into setvec */
768*74a4d8c2SCharles.Forsyth 	p = f->posns[s];
769*74a4d8c2SCharles.Forsyth 	for (i = 1; i <= *p; i++) {
770*74a4d8c2SCharles.Forsyth 		if ((k = f->re[p[i]].ltype) != FINAL) {
771*74a4d8c2SCharles.Forsyth 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
772*74a4d8c2SCharles.Forsyth 			 || (k == DOT && c != 0 && c != HAT)
773*74a4d8c2SCharles.Forsyth 			 || (k == ALL && c != 0)
774*74a4d8c2SCharles.Forsyth 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
775*74a4d8c2SCharles.Forsyth 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
776*74a4d8c2SCharles.Forsyth 				q = f->re[p[i]].lfollow;
777*74a4d8c2SCharles.Forsyth 				for (j = 1; j <= *q; j++) {
778*74a4d8c2SCharles.Forsyth 					if (q[j] >= maxsetvec) {
779*74a4d8c2SCharles.Forsyth 						maxsetvec *= 4;
780*74a4d8c2SCharles.Forsyth 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
781*74a4d8c2SCharles.Forsyth 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
782*74a4d8c2SCharles.Forsyth 						if (setvec == 0 || tmpset == 0)
783*74a4d8c2SCharles.Forsyth 							overflo("cgoto overflow");
784*74a4d8c2SCharles.Forsyth 					}
785*74a4d8c2SCharles.Forsyth 					if (setvec[q[j]] == 0) {
786*74a4d8c2SCharles.Forsyth 						setcnt++;
787*74a4d8c2SCharles.Forsyth 						setvec[q[j]] = 1;
788*74a4d8c2SCharles.Forsyth 					}
789*74a4d8c2SCharles.Forsyth 				}
790*74a4d8c2SCharles.Forsyth 			}
791*74a4d8c2SCharles.Forsyth 		}
792*74a4d8c2SCharles.Forsyth 	}
793*74a4d8c2SCharles.Forsyth 	/* determine if setvec is a previous state */
794*74a4d8c2SCharles.Forsyth 	tmpset[0] = setcnt;
795*74a4d8c2SCharles.Forsyth 	j = 1;
796*74a4d8c2SCharles.Forsyth 	for (i = f->accept; i >= 0; i--)
797*74a4d8c2SCharles.Forsyth 		if (setvec[i]) {
798*74a4d8c2SCharles.Forsyth 			tmpset[j++] = i;
799*74a4d8c2SCharles.Forsyth 		}
800*74a4d8c2SCharles.Forsyth 	/* tmpset == previous state? */
801*74a4d8c2SCharles.Forsyth 	for (i = 1; i <= f->curstat; i++) {
802*74a4d8c2SCharles.Forsyth 		p = f->posns[i];
803*74a4d8c2SCharles.Forsyth 		if ((k = tmpset[0]) != p[0])
804*74a4d8c2SCharles.Forsyth 			goto different;
805*74a4d8c2SCharles.Forsyth 		for (j = 1; j <= k; j++)
806*74a4d8c2SCharles.Forsyth 			if (tmpset[j] != p[j])
807*74a4d8c2SCharles.Forsyth 				goto different;
808*74a4d8c2SCharles.Forsyth 		/* setvec is state i */
809*74a4d8c2SCharles.Forsyth 		f->gototab[s][c] = i;
810*74a4d8c2SCharles.Forsyth 		return i;
811*74a4d8c2SCharles.Forsyth 	  different:;
812*74a4d8c2SCharles.Forsyth 	}
813*74a4d8c2SCharles.Forsyth 
814*74a4d8c2SCharles.Forsyth 	/* add tmpset to current set of states */
815*74a4d8c2SCharles.Forsyth 	if (f->curstat >= NSTATES-1) {
816*74a4d8c2SCharles.Forsyth 		f->curstat = 2;
817*74a4d8c2SCharles.Forsyth 		f->reset = 1;
818*74a4d8c2SCharles.Forsyth 		for (i = 2; i < NSTATES; i++)
819*74a4d8c2SCharles.Forsyth 			xfree(f->posns[i]);
820*74a4d8c2SCharles.Forsyth 	} else
821*74a4d8c2SCharles.Forsyth 		++(f->curstat);
822*74a4d8c2SCharles.Forsyth 	for (i = 0; i < NCHARS; i++)
823*74a4d8c2SCharles.Forsyth 		f->gototab[f->curstat][i] = 0;
824*74a4d8c2SCharles.Forsyth 	xfree(f->posns[f->curstat]);
825*74a4d8c2SCharles.Forsyth 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
826*74a4d8c2SCharles.Forsyth 		overflo("out of space in cgoto");
827*74a4d8c2SCharles.Forsyth 
828*74a4d8c2SCharles.Forsyth 	f->posns[f->curstat] = p;
829*74a4d8c2SCharles.Forsyth 	f->gototab[s][c] = f->curstat;
830*74a4d8c2SCharles.Forsyth 	for (i = 0; i <= setcnt; i++)
831*74a4d8c2SCharles.Forsyth 		p[i] = tmpset[i];
832*74a4d8c2SCharles.Forsyth 	if (setvec[f->accept])
833*74a4d8c2SCharles.Forsyth 		f->out[f->curstat] = 1;
834*74a4d8c2SCharles.Forsyth 	else
835*74a4d8c2SCharles.Forsyth 		f->out[f->curstat] = 0;
836*74a4d8c2SCharles.Forsyth 	return f->curstat;
837*74a4d8c2SCharles.Forsyth }
838*74a4d8c2SCharles.Forsyth 
839*74a4d8c2SCharles.Forsyth 
freefa(fa * f)840*74a4d8c2SCharles.Forsyth void freefa(fa *f)	/* free a finite automaton */
841*74a4d8c2SCharles.Forsyth {
842*74a4d8c2SCharles.Forsyth 	int i;
843*74a4d8c2SCharles.Forsyth 
844*74a4d8c2SCharles.Forsyth 	if (f == NULL)
845*74a4d8c2SCharles.Forsyth 		return;
846*74a4d8c2SCharles.Forsyth 	for (i = 0; i <= f->curstat; i++)
847*74a4d8c2SCharles.Forsyth 		xfree(f->posns[i]);
848*74a4d8c2SCharles.Forsyth 	for (i = 0; i <= f->accept; i++) {
849*74a4d8c2SCharles.Forsyth 		xfree(f->re[i].lfollow);
850*74a4d8c2SCharles.Forsyth 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
851*74a4d8c2SCharles.Forsyth 			xfree((f->re[i].lval.np));
852*74a4d8c2SCharles.Forsyth 	}
853*74a4d8c2SCharles.Forsyth 	xfree(f->restr);
854*74a4d8c2SCharles.Forsyth 	xfree(f);
855*74a4d8c2SCharles.Forsyth }
856