xref: /openbsd-src/usr.bin/awk/b.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: b.c,v 1.9 2001/07/12 05:16:53 deraadt Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'entrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "ytab.h"
36 
37 #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
38 				/* NCHARS is 2**n */
39 #define MAXLIN 22
40 
41 #define type(v)		(v)->nobj	/* badly overloaded here */
42 #define info(v)		(v)->ntype	/* badly overloaded here */
43 #define left(v)		(v)->narg[0]
44 #define right(v)	(v)->narg[1]
45 #define parent(v)	(v)->nnext
46 
47 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define UNARY	case STAR: case PLUS: case QUEST:
49 
50 /* encoding in tree Nodes:
51 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
52 		left is index, right contains value or pointer to value
53 	unary (STAR, PLUS, QUEST): left is child, right is null
54 	binary (CAT, OR): left and right are children
55 	parent contains pointer to parent
56 */
57 
58 
59 int	*setvec;
60 int	*tmpset;
61 int	maxsetvec = 0;
62 
63 int	rtok;		/* next token in current re */
64 int	rlxval;
65 char	*rlxstr;
66 char	*prestr;	/* current position in current re */
67 char	*lastre;	/* origin of last re */
68 
69 static	int setcnt;
70 static	int poscnt;
71 
72 char	*patbeg;
73 int	patlen;
74 
75 #define	NFA	20	/* cache this many dynamic fa's */
76 fa	*fatab[NFA];
77 int	nfatab	= 0;	/* entries in fatab */
78 
79 fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
80 {
81 	int i, use, nuse;
82 	fa *pfa;
83 	static int now = 1;
84 
85 	if (setvec == 0) {	/* first time through any RE */
86 		maxsetvec = MAXLIN;
87 		setvec = (int *) malloc(maxsetvec * sizeof(int));
88 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
89 		if (setvec == 0 || tmpset == 0)
90 			overflo("out of space initializing makedfa");
91 	}
92 
93 	if (compile_time)	/* a constant for sure */
94 		return mkdfa(s, anchor);
95 	for (i = 0; i < nfatab; i++)	/* is it there already? */
96 		if (fatab[i]->anchor == anchor
97 		  && strcmp(fatab[i]->restr, s) == 0) {
98 			fatab[i]->use = now++;
99 			return fatab[i];
100 	}
101 	pfa = mkdfa(s, anchor);
102 	if (nfatab < NFA) {	/* room for another */
103 		fatab[nfatab] = pfa;
104 		fatab[nfatab]->use = now++;
105 		nfatab++;
106 		return pfa;
107 	}
108 	use = fatab[0]->use;	/* replace least-recently used */
109 	nuse = 0;
110 	for (i = 1; i < nfatab; i++)
111 		if (fatab[i]->use < use) {
112 			use = fatab[i]->use;
113 			nuse = i;
114 		}
115 	freefa(fatab[nuse]);
116 	fatab[nuse] = pfa;
117 	pfa->use = now++;
118 	return pfa;
119 }
120 
121 fa *mkdfa(char *s, int anchor)	/* does the real work of making a dfa */
122 				/* anchor = 1 for anchored matches, else 0 */
123 {
124 	Node *p, *p1;
125 	fa *f;
126 
127 	p = reparse(s);
128 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129 		/* put ALL STAR in front of reg.  exp. */
130 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131 		/* put FINAL after reg.  exp. */
132 
133 	poscnt = 0;
134 	penter(p1);	/* enter parent pointers and leaf indices */
135 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136 		overflo("out of space for fa");
137 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
138 	cfoll(f, p1);	/* set up follow sets */
139 	freetr(p1);
140 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141 			overflo("out of space in makedfa");
142 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143 		overflo("out of space in makedfa");
144 	*f->posns[1] = 0;
145 	f->initstat = makeinit(f, anchor);
146 	f->anchor = anchor;
147 	f->restr = tostring(s);
148 	return f;
149 }
150 
151 int makeinit(fa *f, int anchor)
152 {
153 	int i, k;
154 
155 	f->curstat = 2;
156 	f->out[2] = 0;
157 	f->reset = 0;
158 	k = *(f->re[0].lfollow);
159 	xfree(f->posns[2]);
160 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161 		overflo("out of space in makeinit");
162 	for (i=0; i <= k; i++) {
163 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
164 	}
165 	if ((f->posns[2])[1] == f->accept)
166 		f->out[2] = 1;
167 	for (i=0; i < NCHARS; i++)
168 		f->gototab[2][i] = 0;
169 	f->curstat = cgoto(f, 2, HAT);
170 	if (anchor) {
171 		*f->posns[2] = k-1;	/* leave out position 0 */
172 		for (i=0; i < k; i++) {
173 			(f->posns[0])[i] = (f->posns[2])[i];
174 		}
175 
176 		f->out[0] = f->out[2];
177 		if (f->curstat != 2)
178 			--(*f->posns[f->curstat]);
179 	}
180 	return f->curstat;
181 }
182 
183 void penter(Node *p)	/* set up parent pointers and leaf indices */
184 {
185 	switch (type(p)) {
186 	LEAF
187 		info(p) = poscnt;
188 		poscnt++;
189 		break;
190 	UNARY
191 		penter(left(p));
192 		parent(left(p)) = p;
193 		break;
194 	case CAT:
195 	case OR:
196 		penter(left(p));
197 		penter(right(p));
198 		parent(left(p)) = p;
199 		parent(right(p)) = p;
200 		break;
201 	default:	/* can't happen */
202 		FATAL("can't happen: unknown type %d in penter", type(p));
203 		break;
204 	}
205 }
206 
207 void freetr(Node *p)	/* free parse tree */
208 {
209 	switch (type(p)) {
210 	LEAF
211 		xfree(p);
212 		break;
213 	UNARY
214 		freetr(left(p));
215 		xfree(p);
216 		break;
217 	case CAT:
218 	case OR:
219 		freetr(left(p));
220 		freetr(right(p));
221 		xfree(p);
222 		break;
223 	default:	/* can't happen */
224 		FATAL("can't happen: unknown type %d in freetr", type(p));
225 		break;
226 	}
227 }
228 
229 /* in the parsing of regular expressions, metacharacters like . have */
230 /* to be seen literally;  \056 is not a metacharacter. */
231 
232 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
233 {			/* only pick up one 8-bit byte (2 chars) */
234 	char *p;
235 	int n = 0;
236 	int i;
237 
238 	for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
239 		if (isdigit(*p))
240 			n = 16 * n + *p - '0';
241 		else if (*p >= 'a' && *p <= 'f')
242 			n = 16 * n + *p - 'a' + 10;
243 		else if (*p >= 'A' && *p <= 'F')
244 			n = 16 * n + *p - 'A' + 10;
245 	}
246 	*pp = p;
247 	return n;
248 }
249 
250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
251 
252 int quoted(char **pp)	/* pick up next thing after a \\ */
253 			/* and increment *pp */
254 {
255 	char *p = *pp;
256 	int c;
257 
258 	if ((c = *p++) == 't')
259 		c = '\t';
260 	else if (c == 'n')
261 		c = '\n';
262 	else if (c == 'f')
263 		c = '\f';
264 	else if (c == 'r')
265 		c = '\r';
266 	else if (c == 'b')
267 		c = '\b';
268 	else if (c == '\\')
269 		c = '\\';
270 	else if (c == 'x') {	/* hexadecimal goo follows */
271 		c = hexstr(&p);	/* this adds a null if number is invalid */
272 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
273 		int n = c - '0';
274 		if (isoctdigit(*p)) {
275 			n = 8 * n + *p++ - '0';
276 			if (isoctdigit(*p))
277 				n = 8 * n + *p++ - '0';
278 		}
279 		c = n;
280 	} /* else */
281 		/* c = c; */
282 	*pp = p;
283 	return c;
284 }
285 
286 char *cclenter(char *p)	/* add a character class */
287 {
288 	int i, c, c2;
289 	char *op, *bp;
290 	static char *buf = 0;
291 	static int bufsz = 100;
292 
293 	op = p;
294 	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
295 		FATAL("out of space for character class [%.10s...] 1", p);
296 	bp = buf;
297 	for (i = 0; (c = *p++) != 0; ) {
298 		if (c == '\\') {
299 			c = quoted(&p);
300 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
301 			if (*p != 0) {
302 				c = bp[-1];
303 				c2 = *p++;
304 				if (c2 == '\\')
305 					c2 = quoted(&p);
306 				if (c > c2) {	/* empty; ignore */
307 					bp--;
308 					i--;
309 					continue;
310 				}
311 				while (c < c2) {
312 					if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
313 						FATAL("out of space for character class [%.10s...] 2", p);
314 					*bp++ = ++c;
315 					i++;
316 				}
317 				continue;
318 			}
319 		}
320 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
321 			FATAL("out of space for character class [%.10s...] 3", p);
322 		*bp++ = c;
323 		i++;
324 	}
325 	*bp = 0;
326 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327 	xfree(op);
328 	return(tostring(buf));
329 }
330 
331 void overflo(char *s)
332 {
333 	FATAL("regular expression too big: %.30s...", s);
334 }
335 
336 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
337 {
338 	int i;
339 	int *p;
340 
341 	switch (type(v)) {
342 	LEAF
343 		f->re[info(v)].ltype = type(v);
344 		f->re[info(v)].lval.np = right(v);
345 		while (f->accept >= maxsetvec) {	/* guessing here! */
346 			maxsetvec *= 4;
347 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349 			if (setvec == 0 || tmpset == 0)
350 				overflo("out of space in cfoll()");
351 		}
352 		for (i = 0; i <= f->accept; i++)
353 			setvec[i] = 0;
354 		setcnt = 0;
355 		follow(v);	/* computes setvec and setcnt */
356 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357 			overflo("out of space building follow set");
358 		f->re[info(v)].lfollow = p;
359 		*p = setcnt;
360 		for (i = f->accept; i >= 0; i--)
361 			if (setvec[i] == 1)
362 				*++p = i;
363 		break;
364 	UNARY
365 		cfoll(f,left(v));
366 		break;
367 	case CAT:
368 	case OR:
369 		cfoll(f,left(v));
370 		cfoll(f,right(v));
371 		break;
372 	default:	/* can't happen */
373 		FATAL("can't happen: unknown type %d in cfoll", type(v));
374 	}
375 }
376 
377 int first(Node *p)	/* collects initially active leaves of p into setvec */
378 			/* returns 1 if p matches empty string */
379 {
380 	int b, lp;
381 
382 	switch (type(p)) {
383 	LEAF
384 		lp = info(p);	/* look for high-water mark of subscripts */
385 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
386 			maxsetvec *= 4;
387 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389 			if (setvec == 0 || tmpset == 0)
390 				overflo("out of space in first()");
391 		}
392 		if (setvec[lp] != 1) {
393 			setvec[lp] = 1;
394 			setcnt++;
395 		}
396 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
397 			return(0);		/* empty CCL */
398 		else return(1);
399 	case PLUS:
400 		if (first(left(p)) == 0) return(0);
401 		return(1);
402 	case STAR:
403 	case QUEST:
404 		first(left(p));
405 		return(0);
406 	case CAT:
407 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408 		return(1);
409 	case OR:
410 		b = first(right(p));
411 		if (first(left(p)) == 0 || b == 0) return(0);
412 		return(1);
413 	}
414 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
415 	return(-1);
416 }
417 
418 void follow(Node *v)	/* collects leaves that can follow v into setvec */
419 {
420 	Node *p;
421 
422 	if (type(v) == FINAL)
423 		return;
424 	p = parent(v);
425 	switch (type(p)) {
426 	case STAR:
427 	case PLUS:
428 		first(v);
429 		follow(p);
430 		return;
431 
432 	case OR:
433 	case QUEST:
434 		follow(p);
435 		return;
436 
437 	case CAT:
438 		if (v == left(p)) {	/* v is left child of p */
439 			if (first(right(p)) == 0) {
440 				follow(p);
441 				return;
442 			}
443 		} else		/* v is right child */
444 			follow(p);
445 		return;
446 	}
447 }
448 
449 int member(int c, char *s)	/* is c in s? */
450 {
451 	while (*s)
452 		if (c == *s++)
453 			return(1);
454 	return(0);
455 }
456 
457 int match(fa *f, char *p0)	/* shortest match ? */
458 {
459 	int s, ns;
460 	uschar *p = (uschar *) p0;
461 
462 	s = f->reset ? makeinit(f,0) : f->initstat;
463 	if (f->out[s])
464 		return(1);
465 	do {
466 		if ((ns = f->gototab[s][*p]) != 0)
467 			s = ns;
468 		else
469 			s = cgoto(f, s, *p);
470 		if (f->out[s])
471 			return(1);
472 	} while (*p++ != 0);
473 	return(0);
474 }
475 
476 int pmatch(fa *f, char *p0)	/* longest match, for sub */
477 {
478 	int s, ns;
479 	uschar *p = (uschar *) p0;
480 	uschar *q;
481 	int i, k;
482 
483 	s = f->reset ? makeinit(f,1) : f->initstat;
484 	patbeg = (char *) p;
485 	patlen = -1;
486 	do {
487 		q = p;
488 		do {
489 			if (f->out[s])		/* final state */
490 				patlen = q-p;
491 			if ((ns = f->gototab[s][*q]) != 0)
492 				s = ns;
493 			else
494 				s = cgoto(f, s, *q);
495 			if (s == 1) {	/* no transition */
496 				if (patlen >= 0) {
497 					patbeg = (char *) p;
498 					return(1);
499 				}
500 				else
501 					goto nextin;	/* no match */
502 			}
503 		} while (*q++ != 0);
504 		if (f->out[s])
505 			patlen = q-p-1;	/* don't count $ */
506 		if (patlen >= 0) {
507 			patbeg = (char *) p;
508 			return(1);
509 		}
510 	nextin:
511 		s = 2;
512 		if (f->reset) {
513 			for (i = 2; i <= f->curstat; i++)
514 				xfree(f->posns[i]);
515 			k = *f->posns[0];
516 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
517 				overflo("out of space in pmatch");
518 			for (i = 0; i <= k; i++)
519 				(f->posns[2])[i] = (f->posns[0])[i];
520 			f->initstat = f->curstat = 2;
521 			f->out[2] = f->out[0];
522 			for (i = 0; i < NCHARS; i++)
523 				f->gototab[2][i] = 0;
524 		}
525 	} while (*p++ != 0);
526 	return (0);
527 }
528 
529 int nematch(fa *f, char *p0)	/* non-empty match, for sub */
530 {
531 	int s, ns;
532 	uschar *p = (uschar *) p0;
533 	uschar *q;
534 	int i, k;
535 
536 	s = f->reset ? makeinit(f,1) : f->initstat;
537 	patlen = -1;
538 	while (*p) {
539 		q = p;
540 		do {
541 			if (f->out[s])		/* final state */
542 				patlen = q-p;
543 			if ((ns = f->gototab[s][*q]) != 0)
544 				s = ns;
545 			else
546 				s = cgoto(f, s, *q);
547 			if (s == 1) {	/* no transition */
548 				if (patlen > 0) {
549 					patbeg = (char *) p;
550 					return(1);
551 				} else
552 					goto nnextin;	/* no nonempty match */
553 			}
554 		} while (*q++ != 0);
555 		if (f->out[s])
556 			patlen = q-p-1;	/* don't count $ */
557 		if (patlen > 0 ) {
558 			patbeg = (char *) p;
559 			return(1);
560 		}
561 	nnextin:
562 		s = 2;
563 		if (f->reset) {
564 			for (i = 2; i <= f->curstat; i++)
565 				xfree(f->posns[i]);
566 			k = *f->posns[0];
567 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
568 				overflo("out of state space");
569 			for (i = 0; i <= k; i++)
570 				(f->posns[2])[i] = (f->posns[0])[i];
571 			f->initstat = f->curstat = 2;
572 			f->out[2] = f->out[0];
573 			for (i = 0; i < NCHARS; i++)
574 				f->gototab[2][i] = 0;
575 		}
576 		p++;
577 	}
578 	return (0);
579 }
580 
581 Node *reparse(char *p)	/* parses regular expression pointed to by p */
582 {			/* uses relex() to scan regular expression */
583 	Node *np;
584 
585 	dprintf( ("reparse <%s>\n", p) );
586 	lastre = prestr = p;	/* prestr points to string to be parsed */
587 	rtok = relex();
588 	if (rtok == '\0')
589 		FATAL("empty regular expression");
590 	np = regexp();
591 	if (rtok != '\0')
592 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
593 	return(np);
594 }
595 
596 Node *regexp(void)	/* top-level parse of reg expr */
597 {
598 	return (alt(concat(primary())));
599 }
600 
601 Node *primary(void)
602 {
603 	Node *np;
604 
605 	switch (rtok) {
606 	case CHAR:
607 		np = op2(CHAR, NIL, itonp(rlxval));
608 		rtok = relex();
609 		return (unary(np));
610 	case ALL:
611 		rtok = relex();
612 		return (unary(op2(ALL, NIL, NIL)));
613 	case DOT:
614 		rtok = relex();
615 		return (unary(op2(DOT, NIL, NIL)));
616 	case CCL:
617 		np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
618 		rtok = relex();
619 		return (unary(np));
620 	case NCCL:
621 		np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
622 		rtok = relex();
623 		return (unary(np));
624 	case '^':
625 		rtok = relex();
626 		return (unary(op2(CHAR, NIL, itonp(HAT))));
627 	case '$':
628 		rtok = relex();
629 		return (unary(op2(CHAR, NIL, NIL)));
630 	case '(':
631 		rtok = relex();
632 		if (rtok == ')') {	/* special pleading for () */
633 			rtok = relex();
634 			return unary(op2(CCL, NIL, (Node *) tostring("")));
635 		}
636 		np = regexp();
637 		if (rtok == ')') {
638 			rtok = relex();
639 			return (unary(np));
640 		}
641 		else
642 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
643 	default:
644 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
645 	}
646 	return 0;	/*NOTREACHED*/
647 }
648 
649 Node *concat(Node *np)
650 {
651 	switch (rtok) {
652 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
653 		return (concat(op2(CAT, np, primary())));
654 	}
655 	return (np);
656 }
657 
658 Node *alt(Node *np)
659 {
660 	if (rtok == OR) {
661 		rtok = relex();
662 		return (alt(op2(OR, np, concat(primary()))));
663 	}
664 	return (np);
665 }
666 
667 Node *unary(Node *np)
668 {
669 	switch (rtok) {
670 	case STAR:
671 		rtok = relex();
672 		return (unary(op2(STAR, np, NIL)));
673 	case PLUS:
674 		rtok = relex();
675 		return (unary(op2(PLUS, np, NIL)));
676 	case QUEST:
677 		rtok = relex();
678 		return (unary(op2(QUEST, np, NIL)));
679 	default:
680 		return (np);
681 	}
682 }
683 
684 int relex(void)		/* lexical analyzer for reparse */
685 {
686 	int c, n;
687 	int cflag;
688 	static char *buf = 0;
689 	static int bufsz = 100;
690 	char *bp;
691 
692 	switch (c = *prestr++) {
693 	case '|': return OR;
694 	case '*': return STAR;
695 	case '+': return PLUS;
696 	case '?': return QUEST;
697 	case '.': return DOT;
698 	case '\0': prestr--; return '\0';
699 	case '^':
700 	case '$':
701 	case '(':
702 	case ')':
703 		return c;
704 	case '\\':
705 		rlxval = quoted(&prestr);
706 		return CHAR;
707 	default:
708 		rlxval = c;
709 		return CHAR;
710 	case '[':
711 		if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
712 			FATAL("out of space in reg expr %.10s..", lastre);
713 		bp = buf;
714 		if (*prestr == '^') {
715 			cflag = 1;
716 			prestr++;
717 		}
718 		else
719 			cflag = 0;
720 		n = 2 * strlen(prestr)+1;
721 		if (!adjbuf(&buf, &bufsz, n, n, &bp, 0))
722 			FATAL("out of space for reg expr %.10s...", lastre);
723 		for (; ; ) {
724 			if ((c = *prestr++) == '\\') {
725 				*bp++ = '\\';
726 				if ((c = *prestr++) == '\0')
727 					FATAL("nonterminated character class %.20s...", lastre);
728 				*bp++ = c;
729 			} else if (c == '\n') {
730 				FATAL("newline in character class %.20s...", lastre);
731 			} else if (c == '\0') {
732 				FATAL("nonterminated character class %.20s", lastre);
733 			} else if (bp == buf) {	/* 1st char is special */
734 				*bp++ = c;
735 			} else if (c == ']') {
736 				*bp++ = 0;
737 				rlxstr = tostring(buf);
738 				if (cflag == 0)
739 					return CCL;
740 				else
741 					return NCCL;
742 			} else
743 				*bp++ = c;
744 		}
745 	}
746 }
747 
748 int cgoto(fa *f, int s, int c)
749 {
750 	int i, j, k;
751 	int *p, *q;
752 
753 	if (c < 0)
754 		FATAL("can't happen: neg char %d in cgoto", c);
755 	while (f->accept >= maxsetvec) {	/* guessing here! */
756 		maxsetvec *= 4;
757 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
758 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
759 		if (setvec == 0 || tmpset == 0)
760 			overflo("out of space in cgoto()");
761 	}
762 	for (i = 0; i <= f->accept; i++)
763 		setvec[i] = 0;
764 	setcnt = 0;
765 	/* compute positions of gototab[s,c] into setvec */
766 	p = f->posns[s];
767 	for (i = 1; i <= *p; i++) {
768 		if ((k = f->re[p[i]].ltype) != FINAL) {
769 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
770 			 || (k == DOT && c != 0 && c != HAT)
771 			 || (k == ALL && c != 0)
772 			 || (k == CCL && member(c, f->re[p[i]].lval.up))
773 			 || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
774 				q = f->re[p[i]].lfollow;
775 				for (j = 1; j <= *q; j++) {
776 					if (q[j] >= maxsetvec) {
777 						maxsetvec *= 4;
778 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
779 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
780 						if (setvec == 0 || tmpset == 0)
781 							overflo("cgoto overflow");
782 					}
783 					if (setvec[q[j]] == 0) {
784 						setcnt++;
785 						setvec[q[j]] = 1;
786 					}
787 				}
788 			}
789 		}
790 	}
791 	/* determine if setvec is a previous state */
792 	tmpset[0] = setcnt;
793 	j = 1;
794 	for (i = f->accept; i >= 0; i--)
795 		if (setvec[i]) {
796 			tmpset[j++] = i;
797 		}
798 	/* tmpset == previous state? */
799 	for (i = 1; i <= f->curstat; i++) {
800 		p = f->posns[i];
801 		if ((k = tmpset[0]) != p[0])
802 			goto different;
803 		for (j = 1; j <= k; j++)
804 			if (tmpset[j] != p[j])
805 				goto different;
806 		/* setvec is state i */
807 		f->gototab[s][c] = i;
808 		return i;
809 	  different:;
810 	}
811 
812 	/* add tmpset to current set of states */
813 	if (f->curstat >= NSTATES-1) {
814 		f->curstat = 2;
815 		f->reset = 1;
816 		for (i = 2; i < NSTATES; i++)
817 			xfree(f->posns[i]);
818 	} else
819 		++(f->curstat);
820 	for (i = 0; i < NCHARS; i++)
821 		f->gototab[f->curstat][i] = 0;
822 	xfree(f->posns[f->curstat]);
823 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
824 		overflo("out of space in cgoto");
825 
826 	f->posns[f->curstat] = p;
827 	f->gototab[s][c] = f->curstat;
828 	for (i = 0; i <= setcnt; i++)
829 		p[i] = tmpset[i];
830 	if (setvec[f->accept])
831 		f->out[f->curstat] = 1;
832 	else
833 		f->out[f->curstat] = 0;
834 	return f->curstat;
835 }
836 
837 
838 void freefa(fa *f)	/* free a finite automaton */
839 {
840 	int i;
841 
842 	if (f == NULL)
843 		return;
844 	for (i = 0; i <= f->curstat; i++)
845 		xfree(f->posns[i]);
846 	for (i = 0; i <= f->accept; i++) {
847 		xfree(f->re[i].lfollow);
848 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
849 			xfree((f->re[i].lval.np));
850 	}
851 	xfree(f->restr);
852 	xfree(f);
853 }
854