xref: /openbsd-src/usr.bin/awk/b.c (revision 8445c53715e7030056b779e8ab40efb7820981f2)
1 /*	$OpenBSD: b.c,v 1.10 2001/09/08 00:12:40 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'entrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "ytab.h"
36 
37 #define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
38 				/* NCHARS is 2**n */
39 #define MAXLIN 22
40 
41 #define type(v)		(v)->nobj	/* badly overloaded here */
42 #define info(v)		(v)->ntype	/* badly overloaded here */
43 #define left(v)		(v)->narg[0]
44 #define right(v)	(v)->narg[1]
45 #define parent(v)	(v)->nnext
46 
47 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define UNARY	case STAR: case PLUS: case QUEST:
49 
50 /* encoding in tree Nodes:
51 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
52 		left is index, right contains value or pointer to value
53 	unary (STAR, PLUS, QUEST): left is child, right is null
54 	binary (CAT, OR): left and right are children
55 	parent contains pointer to parent
56 */
57 
58 
59 int	*setvec;
60 int	*tmpset;
61 int	maxsetvec = 0;
62 
63 int	rtok;		/* next token in current re */
64 int	rlxval;
65 static uschar	*rlxstr;
66 static uschar	*prestr;	/* current position in current re */
67 static uschar	*lastre;	/* origin of last re */
68 
69 static	int setcnt;
70 static	int poscnt;
71 
72 char	*patbeg;
73 int	patlen;
74 
75 #define	NFA	20	/* cache this many dynamic fa's */
76 fa	*fatab[NFA];
77 int	nfatab	= 0;	/* entries in fatab */
78 
79 fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
80 {
81 	int i, use, nuse;
82 	fa *pfa;
83 	static int now = 1;
84 
85 	if (setvec == 0) {	/* first time through any RE */
86 		maxsetvec = MAXLIN;
87 		setvec = (int *) malloc(maxsetvec * sizeof(int));
88 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
89 		if (setvec == 0 || tmpset == 0)
90 			overflo("out of space initializing makedfa");
91 	}
92 
93 	if (compile_time)	/* a constant for sure */
94 		return mkdfa(s, anchor);
95 	for (i = 0; i < nfatab; i++)	/* is it there already? */
96 		if (fatab[i]->anchor == anchor
97 		  && strcmp(fatab[i]->restr, s) == 0) {
98 			fatab[i]->use = now++;
99 			return fatab[i];
100 		}
101 	pfa = mkdfa(s, anchor);
102 	if (nfatab < NFA) {	/* room for another */
103 		fatab[nfatab] = pfa;
104 		fatab[nfatab]->use = now++;
105 		nfatab++;
106 		return pfa;
107 	}
108 	use = fatab[0]->use;	/* replace least-recently used */
109 	nuse = 0;
110 	for (i = 1; i < nfatab; i++)
111 		if (fatab[i]->use < use) {
112 			use = fatab[i]->use;
113 			nuse = i;
114 		}
115 	freefa(fatab[nuse]);
116 	fatab[nuse] = pfa;
117 	pfa->use = now++;
118 	return pfa;
119 }
120 
121 fa *mkdfa(char *s, int anchor)	/* does the real work of making a dfa */
122 				/* anchor = 1 for anchored matches, else 0 */
123 {
124 	Node *p, *p1;
125 	fa *f;
126 
127 	p = reparse(s);
128 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129 		/* put ALL STAR in front of reg.  exp. */
130 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131 		/* put FINAL after reg.  exp. */
132 
133 	poscnt = 0;
134 	penter(p1);	/* enter parent pointers and leaf indices */
135 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136 		overflo("out of space for fa");
137 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
138 	cfoll(f, p1);	/* set up follow sets */
139 	freetr(p1);
140 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141 			overflo("out of space in makedfa");
142 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143 		overflo("out of space in makedfa");
144 	*f->posns[1] = 0;
145 	f->initstat = makeinit(f, anchor);
146 	f->anchor = anchor;
147 	f->restr = (uschar *) tostring(s);
148 	return f;
149 }
150 
151 int makeinit(fa *f, int anchor)
152 {
153 	int i, k;
154 
155 	f->curstat = 2;
156 	f->out[2] = 0;
157 	f->reset = 0;
158 	k = *(f->re[0].lfollow);
159 	xfree(f->posns[2]);
160 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161 		overflo("out of space in makeinit");
162 	for (i=0; i <= k; i++) {
163 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
164 	}
165 	if ((f->posns[2])[1] == f->accept)
166 		f->out[2] = 1;
167 	for (i=0; i < NCHARS; i++)
168 		f->gototab[2][i] = 0;
169 	f->curstat = cgoto(f, 2, HAT);
170 	if (anchor) {
171 		*f->posns[2] = k-1;	/* leave out position 0 */
172 		for (i=0; i < k; i++) {
173 			(f->posns[0])[i] = (f->posns[2])[i];
174 		}
175 
176 		f->out[0] = f->out[2];
177 		if (f->curstat != 2)
178 			--(*f->posns[f->curstat]);
179 	}
180 	return f->curstat;
181 }
182 
183 void penter(Node *p)	/* set up parent pointers and leaf indices */
184 {
185 	switch (type(p)) {
186 	LEAF
187 		info(p) = poscnt;
188 		poscnt++;
189 		break;
190 	UNARY
191 		penter(left(p));
192 		parent(left(p)) = p;
193 		break;
194 	case CAT:
195 	case OR:
196 		penter(left(p));
197 		penter(right(p));
198 		parent(left(p)) = p;
199 		parent(right(p)) = p;
200 		break;
201 	default:	/* can't happen */
202 		FATAL("can't happen: unknown type %d in penter", type(p));
203 		break;
204 	}
205 }
206 
207 void freetr(Node *p)	/* free parse tree */
208 {
209 	switch (type(p)) {
210 	LEAF
211 		xfree(p);
212 		break;
213 	UNARY
214 		freetr(left(p));
215 		xfree(p);
216 		break;
217 	case CAT:
218 	case OR:
219 		freetr(left(p));
220 		freetr(right(p));
221 		xfree(p);
222 		break;
223 	default:	/* can't happen */
224 		FATAL("can't happen: unknown type %d in freetr", type(p));
225 		break;
226 	}
227 }
228 
229 /* in the parsing of regular expressions, metacharacters like . have */
230 /* to be seen literally;  \056 is not a metacharacter. */
231 
232 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
233 {			/* only pick up one 8-bit byte (2 chars) */
234 	uschar *p;
235 	int n = 0;
236 	int i;
237 
238 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
239 		if (isdigit(*p))
240 			n = 16 * n + *p - '0';
241 		else if (*p >= 'a' && *p <= 'f')
242 			n = 16 * n + *p - 'a' + 10;
243 		else if (*p >= 'A' && *p <= 'F')
244 			n = 16 * n + *p - 'A' + 10;
245 	}
246 	*pp = (char *) p;
247 	return n;
248 }
249 
250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
251 
252 int quoted(char **pp)	/* pick up next thing after a \\ */
253 			/* and increment *pp */
254 {
255 	char *p = *pp;
256 	int c;
257 
258 	if ((c = *p++) == 't')
259 		c = '\t';
260 	else if (c == 'n')
261 		c = '\n';
262 	else if (c == 'f')
263 		c = '\f';
264 	else if (c == 'r')
265 		c = '\r';
266 	else if (c == 'b')
267 		c = '\b';
268 	else if (c == '\\')
269 		c = '\\';
270 	else if (c == 'x') {	/* hexadecimal goo follows */
271 		c = hexstr(&p);	/* this adds a null if number is invalid */
272 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
273 		int n = c - '0';
274 		if (isoctdigit(*p)) {
275 			n = 8 * n + *p++ - '0';
276 			if (isoctdigit(*p))
277 				n = 8 * n + *p++ - '0';
278 		}
279 		c = n;
280 	} /* else */
281 		/* c = c; */
282 	*pp = p;
283 	return c;
284 }
285 
286 char *cclenter(char *argp)	/* add a character class */
287 {
288 	int i, c, c2;
289 	uschar *p = (uschar *) argp;
290 	uschar *op, *bp;
291 	static uschar *buf = 0;
292 	static int bufsz = 100;
293 
294 	op = p;
295 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
296 		FATAL("out of space for character class [%.10s...] 1", p);
297 	bp = buf;
298 	for (i = 0; (c = *p++) != 0; ) {
299 		if (c == '\\') {
300 			c = quoted((char **) &p);
301 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
302 			if (*p != 0) {
303 				c = bp[-1];
304 				c2 = *p++;
305 				if (c2 == '\\')
306 					c2 = quoted((char **) &p);
307 				if (c > c2) {	/* empty; ignore */
308 					bp--;
309 					i--;
310 					continue;
311 				}
312 				while (c < c2) {
313 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
314 						FATAL("out of space for character class [%.10s...] 2", p);
315 					*bp++ = ++c;
316 					i++;
317 				}
318 				continue;
319 			}
320 		}
321 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
322 			FATAL("out of space for character class [%.10s...] 3", p);
323 		*bp++ = c;
324 		i++;
325 	}
326 	*bp = 0;
327 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
328 	xfree(op);
329 	return (char *) tostring((char *) buf);
330 }
331 
332 void overflo(char *s)
333 {
334 	FATAL("regular expression too big: %.30s...", s);
335 }
336 
337 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
338 {
339 	int i;
340 	int *p;
341 
342 	switch (type(v)) {
343 	LEAF
344 		f->re[info(v)].ltype = type(v);
345 		f->re[info(v)].lval.np = right(v);
346 		while (f->accept >= maxsetvec) {	/* guessing here! */
347 			maxsetvec *= 4;
348 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
349 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
350 			if (setvec == 0 || tmpset == 0)
351 				overflo("out of space in cfoll()");
352 		}
353 		for (i = 0; i <= f->accept; i++)
354 			setvec[i] = 0;
355 		setcnt = 0;
356 		follow(v);	/* computes setvec and setcnt */
357 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
358 			overflo("out of space building follow set");
359 		f->re[info(v)].lfollow = p;
360 		*p = setcnt;
361 		for (i = f->accept; i >= 0; i--)
362 			if (setvec[i] == 1)
363 				*++p = i;
364 		break;
365 	UNARY
366 		cfoll(f,left(v));
367 		break;
368 	case CAT:
369 	case OR:
370 		cfoll(f,left(v));
371 		cfoll(f,right(v));
372 		break;
373 	default:	/* can't happen */
374 		FATAL("can't happen: unknown type %d in cfoll", type(v));
375 	}
376 }
377 
378 int first(Node *p)	/* collects initially active leaves of p into setvec */
379 			/* returns 1 if p matches empty string */
380 {
381 	int b, lp;
382 
383 	switch (type(p)) {
384 	LEAF
385 		lp = info(p);	/* look for high-water mark of subscripts */
386 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
387 			maxsetvec *= 4;
388 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
389 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
390 			if (setvec == 0 || tmpset == 0)
391 				overflo("out of space in first()");
392 		}
393 		if (setvec[lp] != 1) {
394 			setvec[lp] = 1;
395 			setcnt++;
396 		}
397 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
398 			return(0);		/* empty CCL */
399 		else return(1);
400 	case PLUS:
401 		if (first(left(p)) == 0) return(0);
402 		return(1);
403 	case STAR:
404 	case QUEST:
405 		first(left(p));
406 		return(0);
407 	case CAT:
408 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
409 		return(1);
410 	case OR:
411 		b = first(right(p));
412 		if (first(left(p)) == 0 || b == 0) return(0);
413 		return(1);
414 	}
415 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
416 	return(-1);
417 }
418 
419 void follow(Node *v)	/* collects leaves that can follow v into setvec */
420 {
421 	Node *p;
422 
423 	if (type(v) == FINAL)
424 		return;
425 	p = parent(v);
426 	switch (type(p)) {
427 	case STAR:
428 	case PLUS:
429 		first(v);
430 		follow(p);
431 		return;
432 
433 	case OR:
434 	case QUEST:
435 		follow(p);
436 		return;
437 
438 	case CAT:
439 		if (v == left(p)) {	/* v is left child of p */
440 			if (first(right(p)) == 0) {
441 				follow(p);
442 				return;
443 			}
444 		} else		/* v is right child */
445 			follow(p);
446 		return;
447 	}
448 }
449 
450 int member(int c, char *sarg)	/* is c in s? */
451 {
452 	uschar *s = (uschar *) sarg;
453 
454 	while (*s)
455 		if (c == *s++)
456 			return(1);
457 	return(0);
458 }
459 
460 int match(fa *f, char *p0)	/* shortest match ? */
461 {
462 	int s, ns;
463 	uschar *p = (uschar *) p0;
464 
465 	s = f->reset ? makeinit(f,0) : f->initstat;
466 	if (f->out[s])
467 		return(1);
468 	do {
469 		if ((ns = f->gototab[s][*p]) != 0)
470 			s = ns;
471 		else
472 			s = cgoto(f, s, *p);
473 		if (f->out[s])
474 			return(1);
475 	} while (*p++ != 0);
476 	return(0);
477 }
478 
479 int pmatch(fa *f, char *p0)	/* longest match, for sub */
480 {
481 	int s, ns;
482 	uschar *p = (uschar *) p0;
483 	uschar *q;
484 	int i, k;
485 
486 	s = f->reset ? makeinit(f,1) : f->initstat;
487 	patbeg = (char *) p;
488 	patlen = -1;
489 	do {
490 		q = p;
491 		do {
492 			if (f->out[s])		/* final state */
493 				patlen = q-p;
494 			if ((ns = f->gototab[s][*q]) != 0)
495 				s = ns;
496 			else
497 				s = cgoto(f, s, *q);
498 			if (s == 1) {	/* no transition */
499 				if (patlen >= 0) {
500 					patbeg = (char *) p;
501 					return(1);
502 				}
503 				else
504 					goto nextin;	/* no match */
505 			}
506 		} while (*q++ != 0);
507 		if (f->out[s])
508 			patlen = q-p-1;	/* don't count $ */
509 		if (patlen >= 0) {
510 			patbeg = (char *) p;
511 			return(1);
512 		}
513 	nextin:
514 		s = 2;
515 		if (f->reset) {
516 			for (i = 2; i <= f->curstat; i++)
517 				xfree(f->posns[i]);
518 			k = *f->posns[0];
519 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
520 				overflo("out of space in pmatch");
521 			for (i = 0; i <= k; i++)
522 				(f->posns[2])[i] = (f->posns[0])[i];
523 			f->initstat = f->curstat = 2;
524 			f->out[2] = f->out[0];
525 			for (i = 0; i < NCHARS; i++)
526 				f->gototab[2][i] = 0;
527 		}
528 	} while (*p++ != 0);
529 	return (0);
530 }
531 
532 int nematch(fa *f, char *p0)	/* non-empty match, for sub */
533 {
534 	int s, ns;
535 	uschar *p = (uschar *) p0;
536 	uschar *q;
537 	int i, k;
538 
539 	s = f->reset ? makeinit(f,1) : f->initstat;
540 	patlen = -1;
541 	while (*p) {
542 		q = p;
543 		do {
544 			if (f->out[s])		/* final state */
545 				patlen = q-p;
546 			if ((ns = f->gototab[s][*q]) != 0)
547 				s = ns;
548 			else
549 				s = cgoto(f, s, *q);
550 			if (s == 1) {	/* no transition */
551 				if (patlen > 0) {
552 					patbeg = (char *) p;
553 					return(1);
554 				} else
555 					goto nnextin;	/* no nonempty match */
556 			}
557 		} while (*q++ != 0);
558 		if (f->out[s])
559 			patlen = q-p-1;	/* don't count $ */
560 		if (patlen > 0 ) {
561 			patbeg = (char *) p;
562 			return(1);
563 		}
564 	nnextin:
565 		s = 2;
566 		if (f->reset) {
567 			for (i = 2; i <= f->curstat; i++)
568 				xfree(f->posns[i]);
569 			k = *f->posns[0];
570 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
571 				overflo("out of state space");
572 			for (i = 0; i <= k; i++)
573 				(f->posns[2])[i] = (f->posns[0])[i];
574 			f->initstat = f->curstat = 2;
575 			f->out[2] = f->out[0];
576 			for (i = 0; i < NCHARS; i++)
577 				f->gototab[2][i] = 0;
578 		}
579 		p++;
580 	}
581 	return (0);
582 }
583 
584 Node *reparse(char *p)	/* parses regular expression pointed to by p */
585 {			/* uses relex() to scan regular expression */
586 	Node *np;
587 
588 	dprintf( ("reparse <%s>\n", p) );
589 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
590 	rtok = relex();
591 	if (rtok == '\0')
592 		FATAL("empty regular expression");
593 	np = regexp();
594 	if (rtok != '\0')
595 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
596 	return(np);
597 }
598 
599 Node *regexp(void)	/* top-level parse of reg expr */
600 {
601 	return (alt(concat(primary())));
602 }
603 
604 Node *primary(void)
605 {
606 	Node *np;
607 
608 	switch (rtok) {
609 	case CHAR:
610 		np = op2(CHAR, NIL, itonp(rlxval));
611 		rtok = relex();
612 		return (unary(np));
613 	case ALL:
614 		rtok = relex();
615 		return (unary(op2(ALL, NIL, NIL)));
616 	case DOT:
617 		rtok = relex();
618 		return (unary(op2(DOT, NIL, NIL)));
619 	case CCL:
620 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
621 		rtok = relex();
622 		return (unary(np));
623 	case NCCL:
624 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
625 		rtok = relex();
626 		return (unary(np));
627 	case '^':
628 		rtok = relex();
629 		return (unary(op2(CHAR, NIL, itonp(HAT))));
630 	case '$':
631 		rtok = relex();
632 		return (unary(op2(CHAR, NIL, NIL)));
633 	case '(':
634 		rtok = relex();
635 		if (rtok == ')') {	/* special pleading for () */
636 			rtok = relex();
637 			return unary(op2(CCL, NIL, (Node *) tostring("")));
638 		}
639 		np = regexp();
640 		if (rtok == ')') {
641 			rtok = relex();
642 			return (unary(np));
643 		}
644 		else
645 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
646 	default:
647 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
648 	}
649 	return 0;	/*NOTREACHED*/
650 }
651 
652 Node *concat(Node *np)
653 {
654 	switch (rtok) {
655 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
656 		return (concat(op2(CAT, np, primary())));
657 	}
658 	return (np);
659 }
660 
661 Node *alt(Node *np)
662 {
663 	if (rtok == OR) {
664 		rtok = relex();
665 		return (alt(op2(OR, np, concat(primary()))));
666 	}
667 	return (np);
668 }
669 
670 Node *unary(Node *np)
671 {
672 	switch (rtok) {
673 	case STAR:
674 		rtok = relex();
675 		return (unary(op2(STAR, np, NIL)));
676 	case PLUS:
677 		rtok = relex();
678 		return (unary(op2(PLUS, np, NIL)));
679 	case QUEST:
680 		rtok = relex();
681 		return (unary(op2(QUEST, np, NIL)));
682 	default:
683 		return (np);
684 	}
685 }
686 
687 int relex(void)		/* lexical analyzer for reparse */
688 {
689 	int c, n;
690 	int cflag;
691 	static uschar *buf = 0;
692 	static int bufsz = 100;
693 	uschar *bp;
694 
695 	switch (c = *prestr++) {
696 	case '|': return OR;
697 	case '*': return STAR;
698 	case '+': return PLUS;
699 	case '?': return QUEST;
700 	case '.': return DOT;
701 	case '\0': prestr--; return '\0';
702 	case '^':
703 	case '$':
704 	case '(':
705 	case ')':
706 		return c;
707 	case '\\':
708 		rlxval = quoted((char **) &prestr);
709 		return CHAR;
710 	default:
711 		rlxval = c;
712 		return CHAR;
713 	case '[':
714 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
715 			FATAL("out of space in reg expr %.10s..", lastre);
716 		bp = buf;
717 		if (*prestr == '^') {
718 			cflag = 1;
719 			prestr++;
720 		}
721 		else
722 			cflag = 0;
723 		n = 2 * strlen(prestr)+1;
724 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
725 			FATAL("out of space for reg expr %.10s...", lastre);
726 		for (; ; ) {
727 			if ((c = *prestr++) == '\\') {
728 				*bp++ = '\\';
729 				if ((c = *prestr++) == '\0')
730 					FATAL("nonterminated character class %.20s...", lastre);
731 				*bp++ = c;
732 			/* } else if (c == '\n') { */
733 			/* 	FATAL("newline in character class %.20s...", lastre); */
734 			} else if (c == '\0') {
735 				FATAL("nonterminated character class %.20s", lastre);
736 			} else if (bp == buf) {	/* 1st char is special */
737 				*bp++ = c;
738 			} else if (c == ']') {
739 				*bp++ = 0;
740 				rlxstr = (uschar *) tostring((char *) buf);
741 				if (cflag == 0)
742 					return CCL;
743 				else
744 					return NCCL;
745 			} else
746 				*bp++ = c;
747 		}
748 	}
749 }
750 
751 int cgoto(fa *f, int s, int c)
752 {
753 	int i, j, k;
754 	int *p, *q;
755 
756 	if (c < 0 || c > 255)
757 		FATAL("can't happen: neg char %d in cgoto", c);
758 	while (f->accept >= maxsetvec) {	/* guessing here! */
759 		maxsetvec *= 4;
760 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
761 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
762 		if (setvec == 0 || tmpset == 0)
763 			overflo("out of space in cgoto()");
764 	}
765 	for (i = 0; i <= f->accept; i++)
766 		setvec[i] = 0;
767 	setcnt = 0;
768 	/* compute positions of gototab[s,c] into setvec */
769 	p = f->posns[s];
770 	for (i = 1; i <= *p; i++) {
771 		if ((k = f->re[p[i]].ltype) != FINAL) {
772 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
773 			 || (k == DOT && c != 0 && c != HAT)
774 			 || (k == ALL && c != 0)
775 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
776 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
777 				q = f->re[p[i]].lfollow;
778 				for (j = 1; j <= *q; j++) {
779 					if (q[j] >= maxsetvec) {
780 						maxsetvec *= 4;
781 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
782 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
783 						if (setvec == 0 || tmpset == 0)
784 							overflo("cgoto overflow");
785 					}
786 					if (setvec[q[j]] == 0) {
787 						setcnt++;
788 						setvec[q[j]] = 1;
789 					}
790 				}
791 			}
792 		}
793 	}
794 	/* determine if setvec is a previous state */
795 	tmpset[0] = setcnt;
796 	j = 1;
797 	for (i = f->accept; i >= 0; i--)
798 		if (setvec[i]) {
799 			tmpset[j++] = i;
800 		}
801 	/* tmpset == previous state? */
802 	for (i = 1; i <= f->curstat; i++) {
803 		p = f->posns[i];
804 		if ((k = tmpset[0]) != p[0])
805 			goto different;
806 		for (j = 1; j <= k; j++)
807 			if (tmpset[j] != p[j])
808 				goto different;
809 		/* setvec is state i */
810 		f->gototab[s][c] = i;
811 		return i;
812 	  different:;
813 	}
814 
815 	/* add tmpset to current set of states */
816 	if (f->curstat >= NSTATES-1) {
817 		f->curstat = 2;
818 		f->reset = 1;
819 		for (i = 2; i < NSTATES; i++)
820 			xfree(f->posns[i]);
821 	} else
822 		++(f->curstat);
823 	for (i = 0; i < NCHARS; i++)
824 		f->gototab[f->curstat][i] = 0;
825 	xfree(f->posns[f->curstat]);
826 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
827 		overflo("out of space in cgoto");
828 
829 	f->posns[f->curstat] = p;
830 	f->gototab[s][c] = f->curstat;
831 	for (i = 0; i <= setcnt; i++)
832 		p[i] = tmpset[i];
833 	if (setvec[f->accept])
834 		f->out[f->curstat] = 1;
835 	else
836 		f->out[f->curstat] = 0;
837 	return f->curstat;
838 }
839 
840 
841 void freefa(fa *f)	/* free a finite automaton */
842 {
843 	int i;
844 
845 	if (f == NULL)
846 		return;
847 	for (i = 0; i <= f->curstat; i++)
848 		xfree(f->posns[i]);
849 	for (i = 0; i <= f->accept; i++) {
850 		xfree(f->re[i].lfollow);
851 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
852 			xfree((f->re[i].lval.np));
853 	}
854 	xfree(f->restr);
855 	xfree(f);
856 }
857