xref: /openbsd-src/usr.bin/awk/b.c (revision db3296cf5c1dd9058ceecc3a29fe4aaa0bd26000)
1 /*	$OpenBSD: b.c,v 1.11 2002/12/19 21:24:28 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'entrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "ytab.h"
36 
37 #define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
38 				/* NCHARS is 2**n */
39 #define MAXLIN 22
40 
41 #define type(v)		(v)->nobj	/* badly overloaded here */
42 #define info(v)		(v)->ntype	/* badly overloaded here */
43 #define left(v)		(v)->narg[0]
44 #define right(v)	(v)->narg[1]
45 #define parent(v)	(v)->nnext
46 
47 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define UNARY	case STAR: case PLUS: case QUEST:
49 
50 /* encoding in tree Nodes:
51 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
52 		left is index, right contains value or pointer to value
53 	unary (STAR, PLUS, QUEST): left is child, right is null
54 	binary (CAT, OR): left and right are children
55 	parent contains pointer to parent
56 */
57 
58 
59 int	*setvec;
60 int	*tmpset;
61 int	maxsetvec = 0;
62 
63 int	rtok;		/* next token in current re */
64 int	rlxval;
65 static uschar	*rlxstr;
66 static uschar	*prestr;	/* current position in current re */
67 static uschar	*lastre;	/* origin of last re */
68 
69 static	int setcnt;
70 static	int poscnt;
71 
72 char	*patbeg;
73 int	patlen;
74 
75 #define	NFA	20	/* cache this many dynamic fa's */
76 fa	*fatab[NFA];
77 int	nfatab	= 0;	/* entries in fatab */
78 
79 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
80 {
81 	int i, use, nuse;
82 	fa *pfa;
83 	static int now = 1;
84 
85 	if (setvec == 0) {	/* first time through any RE */
86 		maxsetvec = MAXLIN;
87 		setvec = (int *) malloc(maxsetvec * sizeof(int));
88 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
89 		if (setvec == 0 || tmpset == 0)
90 			overflo("out of space initializing makedfa");
91 	}
92 
93 	if (compile_time)	/* a constant for sure */
94 		return mkdfa(s, anchor);
95 	for (i = 0; i < nfatab; i++)	/* is it there already? */
96 		if (fatab[i]->anchor == anchor
97 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
98 			fatab[i]->use = now++;
99 			return fatab[i];
100 		}
101 	pfa = mkdfa(s, anchor);
102 	if (nfatab < NFA) {	/* room for another */
103 		fatab[nfatab] = pfa;
104 		fatab[nfatab]->use = now++;
105 		nfatab++;
106 		return pfa;
107 	}
108 	use = fatab[0]->use;	/* replace least-recently used */
109 	nuse = 0;
110 	for (i = 1; i < nfatab; i++)
111 		if (fatab[i]->use < use) {
112 			use = fatab[i]->use;
113 			nuse = i;
114 		}
115 	freefa(fatab[nuse]);
116 	fatab[nuse] = pfa;
117 	pfa->use = now++;
118 	return pfa;
119 }
120 
121 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
122 				/* anchor = 1 for anchored matches, else 0 */
123 {
124 	Node *p, *p1;
125 	fa *f;
126 
127 	p = reparse(s);
128 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129 		/* put ALL STAR in front of reg.  exp. */
130 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131 		/* put FINAL after reg.  exp. */
132 
133 	poscnt = 0;
134 	penter(p1);	/* enter parent pointers and leaf indices */
135 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136 		overflo("out of space for fa");
137 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
138 	cfoll(f, p1);	/* set up follow sets */
139 	freetr(p1);
140 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141 			overflo("out of space in makedfa");
142 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143 		overflo("out of space in makedfa");
144 	*f->posns[1] = 0;
145 	f->initstat = makeinit(f, anchor);
146 	f->anchor = anchor;
147 	f->restr = (uschar *) tostring(s);
148 	return f;
149 }
150 
151 int makeinit(fa *f, int anchor)
152 {
153 	int i, k;
154 
155 	f->curstat = 2;
156 	f->out[2] = 0;
157 	f->reset = 0;
158 	k = *(f->re[0].lfollow);
159 	xfree(f->posns[2]);
160 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161 		overflo("out of space in makeinit");
162 	for (i=0; i <= k; i++) {
163 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
164 	}
165 	if ((f->posns[2])[1] == f->accept)
166 		f->out[2] = 1;
167 	for (i=0; i < NCHARS; i++)
168 		f->gototab[2][i] = 0;
169 	f->curstat = cgoto(f, 2, HAT);
170 	if (anchor) {
171 		*f->posns[2] = k-1;	/* leave out position 0 */
172 		for (i=0; i < k; i++) {
173 			(f->posns[0])[i] = (f->posns[2])[i];
174 		}
175 
176 		f->out[0] = f->out[2];
177 		if (f->curstat != 2)
178 			--(*f->posns[f->curstat]);
179 	}
180 	return f->curstat;
181 }
182 
183 void penter(Node *p)	/* set up parent pointers and leaf indices */
184 {
185 	switch (type(p)) {
186 	LEAF
187 		info(p) = poscnt;
188 		poscnt++;
189 		break;
190 	UNARY
191 		penter(left(p));
192 		parent(left(p)) = p;
193 		break;
194 	case CAT:
195 	case OR:
196 		penter(left(p));
197 		penter(right(p));
198 		parent(left(p)) = p;
199 		parent(right(p)) = p;
200 		break;
201 	default:	/* can't happen */
202 		FATAL("can't happen: unknown type %d in penter", type(p));
203 		break;
204 	}
205 }
206 
207 void freetr(Node *p)	/* free parse tree */
208 {
209 	switch (type(p)) {
210 	LEAF
211 		xfree(p);
212 		break;
213 	UNARY
214 		freetr(left(p));
215 		xfree(p);
216 		break;
217 	case CAT:
218 	case OR:
219 		freetr(left(p));
220 		freetr(right(p));
221 		xfree(p);
222 		break;
223 	default:	/* can't happen */
224 		FATAL("can't happen: unknown type %d in freetr", type(p));
225 		break;
226 	}
227 }
228 
229 /* in the parsing of regular expressions, metacharacters like . have */
230 /* to be seen literally;  \056 is not a metacharacter. */
231 
232 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
233 {			/* only pick up one 8-bit byte (2 chars) */
234 	uschar *p;
235 	int n = 0;
236 	int i;
237 
238 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
239 		if (isdigit(*p))
240 			n = 16 * n + *p - '0';
241 		else if (*p >= 'a' && *p <= 'f')
242 			n = 16 * n + *p - 'a' + 10;
243 		else if (*p >= 'A' && *p <= 'F')
244 			n = 16 * n + *p - 'A' + 10;
245 	}
246 	*pp = (char *) p;
247 	return n;
248 }
249 
250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
251 
252 int quoted(char **pp)	/* pick up next thing after a \\ */
253 			/* and increment *pp */
254 {
255 	char *p = *pp;
256 	int c;
257 
258 	if ((c = *p++) == 't')
259 		c = '\t';
260 	else if (c == 'n')
261 		c = '\n';
262 	else if (c == 'f')
263 		c = '\f';
264 	else if (c == 'r')
265 		c = '\r';
266 	else if (c == 'b')
267 		c = '\b';
268 	else if (c == '\\')
269 		c = '\\';
270 	else if (c == 'x') {	/* hexadecimal goo follows */
271 		c = hexstr(&p);	/* this adds a null if number is invalid */
272 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
273 		int n = c - '0';
274 		if (isoctdigit(*p)) {
275 			n = 8 * n + *p++ - '0';
276 			if (isoctdigit(*p))
277 				n = 8 * n + *p++ - '0';
278 		}
279 		c = n;
280 	} /* else */
281 		/* c = c; */
282 	*pp = p;
283 	return c;
284 }
285 
286 char *cclenter(const char *argp)	/* add a character class */
287 {
288 	int i, c, c2;
289 	uschar *p = (uschar *) argp;
290 	uschar *op, *bp;
291 	static uschar *buf = 0;
292 	static int bufsz = 100;
293 
294 	op = p;
295 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
296 		FATAL("out of space for character class [%.10s...] 1", p);
297 	bp = buf;
298 	for (i = 0; (c = *p++) != 0; ) {
299 		if (c == '\\') {
300 			c = quoted((char **) &p);
301 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
302 			if (*p != 0) {
303 				c = bp[-1];
304 				c2 = *p++;
305 				if (c2 == '\\')
306 					c2 = quoted((char **) &p);
307 				if (c > c2) {	/* empty; ignore */
308 					bp--;
309 					i--;
310 					continue;
311 				}
312 				while (c < c2) {
313 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
314 						FATAL("out of space for character class [%.10s...] 2", p);
315 					*bp++ = ++c;
316 					i++;
317 				}
318 				continue;
319 			}
320 		}
321 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
322 			FATAL("out of space for character class [%.10s...] 3", p);
323 		*bp++ = c;
324 		i++;
325 	}
326 	*bp = 0;
327 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
328 	xfree(op);
329 	return (char *) tostring((char *) buf);
330 }
331 
332 void overflo(const char *s)
333 {
334 	FATAL("regular expression too big: %.30s...", s);
335 }
336 
337 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
338 {
339 	int i;
340 	int *p;
341 
342 	switch (type(v)) {
343 	LEAF
344 		f->re[info(v)].ltype = type(v);
345 		f->re[info(v)].lval.np = right(v);
346 		while (f->accept >= maxsetvec) {	/* guessing here! */
347 			maxsetvec *= 4;
348 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
349 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
350 			if (setvec == 0 || tmpset == 0)
351 				overflo("out of space in cfoll()");
352 		}
353 		for (i = 0; i <= f->accept; i++)
354 			setvec[i] = 0;
355 		setcnt = 0;
356 		follow(v);	/* computes setvec and setcnt */
357 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
358 			overflo("out of space building follow set");
359 		f->re[info(v)].lfollow = p;
360 		*p = setcnt;
361 		for (i = f->accept; i >= 0; i--)
362 			if (setvec[i] == 1)
363 				*++p = i;
364 		break;
365 	UNARY
366 		cfoll(f,left(v));
367 		break;
368 	case CAT:
369 	case OR:
370 		cfoll(f,left(v));
371 		cfoll(f,right(v));
372 		break;
373 	default:	/* can't happen */
374 		FATAL("can't happen: unknown type %d in cfoll", type(v));
375 	}
376 }
377 
378 int first(Node *p)	/* collects initially active leaves of p into setvec */
379 			/* returns 1 if p matches empty string */
380 {
381 	int b, lp;
382 
383 	switch (type(p)) {
384 	LEAF
385 		lp = info(p);	/* look for high-water mark of subscripts */
386 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
387 			maxsetvec *= 4;
388 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
389 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
390 			if (setvec == 0 || tmpset == 0)
391 				overflo("out of space in first()");
392 		}
393 		if (setvec[lp] != 1) {
394 			setvec[lp] = 1;
395 			setcnt++;
396 		}
397 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
398 			return(0);		/* empty CCL */
399 		else return(1);
400 	case PLUS:
401 		if (first(left(p)) == 0) return(0);
402 		return(1);
403 	case STAR:
404 	case QUEST:
405 		first(left(p));
406 		return(0);
407 	case CAT:
408 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
409 		return(1);
410 	case OR:
411 		b = first(right(p));
412 		if (first(left(p)) == 0 || b == 0) return(0);
413 		return(1);
414 	}
415 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
416 	return(-1);
417 }
418 
419 void follow(Node *v)	/* collects leaves that can follow v into setvec */
420 {
421 	Node *p;
422 
423 	if (type(v) == FINAL)
424 		return;
425 	p = parent(v);
426 	switch (type(p)) {
427 	case STAR:
428 	case PLUS:
429 		first(v);
430 		follow(p);
431 		return;
432 
433 	case OR:
434 	case QUEST:
435 		follow(p);
436 		return;
437 
438 	case CAT:
439 		if (v == left(p)) {	/* v is left child of p */
440 			if (first(right(p)) == 0) {
441 				follow(p);
442 				return;
443 			}
444 		} else		/* v is right child */
445 			follow(p);
446 		return;
447 	}
448 }
449 
450 int member(int c, const char *sarg)	/* is c in s? */
451 {
452 	uschar *s = (uschar *) sarg;
453 
454 	while (*s)
455 		if (c == *s++)
456 			return(1);
457 	return(0);
458 }
459 
460 int match(fa *f, const char *p0)	/* shortest match ? */
461 {
462 	int s, ns;
463 	uschar *p = (uschar *) p0;
464 
465 	s = f->reset ? makeinit(f,0) : f->initstat;
466 	if (f->out[s])
467 		return(1);
468 	do {
469 		if ((ns = f->gototab[s][*p]) != 0)
470 			s = ns;
471 		else
472 			s = cgoto(f, s, *p);
473 		if (f->out[s])
474 			return(1);
475 	} while (*p++ != 0);
476 	return(0);
477 }
478 
479 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
480 {
481 	int s, ns;
482 	uschar *p = (uschar *) p0;
483 	uschar *q;
484 	int i, k;
485 
486 	s = f->reset ? makeinit(f,1) : f->initstat;
487 	patbeg = (char *) p;
488 	patlen = -1;
489 	do {
490 		q = p;
491 		do {
492 			if (f->out[s])		/* final state */
493 				patlen = q-p;
494 			if ((ns = f->gototab[s][*q]) != 0)
495 				s = ns;
496 			else
497 				s = cgoto(f, s, *q);
498 			if (s == 1) {	/* no transition */
499 				if (patlen >= 0) {
500 					patbeg = (char *) p;
501 					return(1);
502 				}
503 				else
504 					goto nextin;	/* no match */
505 			}
506 		} while (*q++ != 0);
507 		if (f->out[s])
508 			patlen = q-p-1;	/* don't count $ */
509 		if (patlen >= 0) {
510 			patbeg = (char *) p;
511 			return(1);
512 		}
513 	nextin:
514 		s = 2;
515 		if (f->reset) {
516 			for (i = 2; i <= f->curstat; i++)
517 				xfree(f->posns[i]);
518 			k = *f->posns[0];
519 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
520 				overflo("out of space in pmatch");
521 			for (i = 0; i <= k; i++)
522 				(f->posns[2])[i] = (f->posns[0])[i];
523 			f->initstat = f->curstat = 2;
524 			f->out[2] = f->out[0];
525 			for (i = 0; i < NCHARS; i++)
526 				f->gototab[2][i] = 0;
527 		}
528 	} while (*p++ != 0);
529 	return (0);
530 }
531 
532 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
533 {
534 	int s, ns;
535 	uschar *p = (uschar *) p0;
536 	uschar *q;
537 	int i, k;
538 
539 	s = f->reset ? makeinit(f,1) : f->initstat;
540 	patlen = -1;
541 	while (*p) {
542 		q = p;
543 		do {
544 			if (f->out[s])		/* final state */
545 				patlen = q-p;
546 			if ((ns = f->gototab[s][*q]) != 0)
547 				s = ns;
548 			else
549 				s = cgoto(f, s, *q);
550 			if (s == 1) {	/* no transition */
551 				if (patlen > 0) {
552 					patbeg = (char *) p;
553 					return(1);
554 				} else
555 					goto nnextin;	/* no nonempty match */
556 			}
557 		} while (*q++ != 0);
558 		if (f->out[s])
559 			patlen = q-p-1;	/* don't count $ */
560 		if (patlen > 0 ) {
561 			patbeg = (char *) p;
562 			return(1);
563 		}
564 	nnextin:
565 		s = 2;
566 		if (f->reset) {
567 			for (i = 2; i <= f->curstat; i++)
568 				xfree(f->posns[i]);
569 			k = *f->posns[0];
570 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
571 				overflo("out of state space");
572 			for (i = 0; i <= k; i++)
573 				(f->posns[2])[i] = (f->posns[0])[i];
574 			f->initstat = f->curstat = 2;
575 			f->out[2] = f->out[0];
576 			for (i = 0; i < NCHARS; i++)
577 				f->gototab[2][i] = 0;
578 		}
579 		p++;
580 	}
581 	return (0);
582 }
583 
584 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
585 {			/* uses relex() to scan regular expression */
586 	Node *np;
587 
588 	dprintf( ("reparse <%s>\n", p) );
589 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
590 	rtok = relex();
591 	/* GNU compatibility: an empty regexp matches anything */
592 	if (rtok == '\0')
593 		/* FATAL("empty regular expression"); previous */
594 		return(op2(ALL, NIL, NIL));
595 	np = regexp();
596 	if (rtok != '\0')
597 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
598 	return(np);
599 }
600 
601 Node *regexp(void)	/* top-level parse of reg expr */
602 {
603 	return (alt(concat(primary())));
604 }
605 
606 Node *primary(void)
607 {
608 	Node *np;
609 
610 	switch (rtok) {
611 	case CHAR:
612 		np = op2(CHAR, NIL, itonp(rlxval));
613 		rtok = relex();
614 		return (unary(np));
615 	case ALL:
616 		rtok = relex();
617 		return (unary(op2(ALL, NIL, NIL)));
618 	case DOT:
619 		rtok = relex();
620 		return (unary(op2(DOT, NIL, NIL)));
621 	case CCL:
622 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
623 		rtok = relex();
624 		return (unary(np));
625 	case NCCL:
626 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
627 		rtok = relex();
628 		return (unary(np));
629 	case '^':
630 		rtok = relex();
631 		return (unary(op2(CHAR, NIL, itonp(HAT))));
632 	case '$':
633 		rtok = relex();
634 		return (unary(op2(CHAR, NIL, NIL)));
635 	case '(':
636 		rtok = relex();
637 		if (rtok == ')') {	/* special pleading for () */
638 			rtok = relex();
639 			return unary(op2(CCL, NIL, (Node *) tostring("")));
640 		}
641 		np = regexp();
642 		if (rtok == ')') {
643 			rtok = relex();
644 			return (unary(np));
645 		}
646 		else
647 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
648 	default:
649 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
650 	}
651 	return 0;	/*NOTREACHED*/
652 }
653 
654 Node *concat(Node *np)
655 {
656 	switch (rtok) {
657 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
658 		return (concat(op2(CAT, np, primary())));
659 	}
660 	return (np);
661 }
662 
663 Node *alt(Node *np)
664 {
665 	if (rtok == OR) {
666 		rtok = relex();
667 		return (alt(op2(OR, np, concat(primary()))));
668 	}
669 	return (np);
670 }
671 
672 Node *unary(Node *np)
673 {
674 	switch (rtok) {
675 	case STAR:
676 		rtok = relex();
677 		return (unary(op2(STAR, np, NIL)));
678 	case PLUS:
679 		rtok = relex();
680 		return (unary(op2(PLUS, np, NIL)));
681 	case QUEST:
682 		rtok = relex();
683 		return (unary(op2(QUEST, np, NIL)));
684 	default:
685 		return (np);
686 	}
687 }
688 
689 /*
690  * Character class definitions conformant to the POSIX locale as
691  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
692  * and operating character sets are both ASCII (ISO646) or supersets
693  * thereof.
694  *
695  * Note that to avoid overflowing the temporary buffer used in
696  * relex(), the expanded character class (prior to range expansion)
697  * must be less than twice the size of their full name.
698  */
699 struct charclass {
700 	const char *cc_name;
701 	int cc_namelen;
702 	const char *cc_expand;
703 } charclasses[] = {
704 	{ "alnum",	5,	"0-9A-Za-z" },
705 	{ "alpha",	5,	"A-Za-z" },
706 	{ "blank",	5,	" \t" },
707 	{ "cntrl",	5,	"\000-\037\177" },
708 	{ "digit",	5,	"0-9" },
709 	{ "graph",	5,	"\041-\176" },
710 	{ "lower",	5,	"a-z" },
711 	{ "print",	5,	" \041-\176" },
712 	{ "punct",	5,	"\041-\057\072-\100\133-\140\173-\176" },
713 	{ "space",	5,	" \f\n\r\t\v" },
714 	{ "upper",	5,	"A-Z" },
715 	{ "xdigit",	6,	"0-9A-Fa-f" },
716 	{ NULL,		0,	NULL },
717 };
718 
719 
720 int relex(void)		/* lexical analyzer for reparse */
721 {
722 	int c, n;
723 	int cflag;
724 	static uschar *buf = 0;
725 	static int bufsz = 100;
726 	uschar *bp;
727 	struct charclass *cc;
728 	const uschar *p;
729 
730 	switch (c = *prestr++) {
731 	case '|': return OR;
732 	case '*': return STAR;
733 	case '+': return PLUS;
734 	case '?': return QUEST;
735 	case '.': return DOT;
736 	case '\0': prestr--; return '\0';
737 	case '^':
738 	case '$':
739 	case '(':
740 	case ')':
741 		return c;
742 	case '\\':
743 		rlxval = quoted((char **) &prestr);
744 		return CHAR;
745 	default:
746 		rlxval = c;
747 		return CHAR;
748 	case '[':
749 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
750 			FATAL("out of space in reg expr %.10s..", lastre);
751 		bp = buf;
752 		if (*prestr == '^') {
753 			cflag = 1;
754 			prestr++;
755 		}
756 		else
757 			cflag = 0;
758 		n = 2 * strlen((const char *) prestr)+1;
759 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
760 			FATAL("out of space for reg expr %.10s...", lastre);
761 		for (; ; ) {
762 			if ((c = *prestr++) == '\\') {
763 				*bp++ = '\\';
764 				if ((c = *prestr++) == '\0')
765 					FATAL("nonterminated character class %.20s...", lastre);
766 				*bp++ = c;
767 			/* } else if (c == '\n') { */
768 			/* 	FATAL("newline in character class %.20s...", lastre); */
769 			} else if (c == '[' && *prestr == ':') {
770 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
771 				for (cc = charclasses; cc->cc_name; cc++)
772 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
773 						break;
774 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
775 				    prestr[2 + cc->cc_namelen] == ']') {
776 					prestr += cc->cc_namelen + 3;
777 					for (p = (const uschar *) cc->cc_expand; *p; p++)
778 						*bp++ = *p;
779 				} else
780 					*bp++ = c;
781 			} else if (c == '\0') {
782 				FATAL("nonterminated character class %.20s", lastre);
783 			} else if (bp == buf) {	/* 1st char is special */
784 				*bp++ = c;
785 			} else if (c == ']') {
786 				*bp++ = 0;
787 				rlxstr = (uschar *) tostring((char *) buf);
788 				if (cflag == 0)
789 					return CCL;
790 				else
791 					return NCCL;
792 			} else
793 				*bp++ = c;
794 		}
795 	}
796 }
797 
798 int cgoto(fa *f, int s, int c)
799 {
800 	int i, j, k;
801 	int *p, *q;
802 
803 	if (c < 0 || c > 255)
804 		FATAL("can't happen: neg char %d in cgoto", c);
805 	while (f->accept >= maxsetvec) {	/* guessing here! */
806 		maxsetvec *= 4;
807 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
808 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
809 		if (setvec == 0 || tmpset == 0)
810 			overflo("out of space in cgoto()");
811 	}
812 	for (i = 0; i <= f->accept; i++)
813 		setvec[i] = 0;
814 	setcnt = 0;
815 	/* compute positions of gototab[s,c] into setvec */
816 	p = f->posns[s];
817 	for (i = 1; i <= *p; i++) {
818 		if ((k = f->re[p[i]].ltype) != FINAL) {
819 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
820 			 || (k == DOT && c != 0 && c != HAT)
821 			 || (k == ALL && c != 0)
822 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
823 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
824 				q = f->re[p[i]].lfollow;
825 				for (j = 1; j <= *q; j++) {
826 					if (q[j] >= maxsetvec) {
827 						maxsetvec *= 4;
828 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
829 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
830 						if (setvec == 0 || tmpset == 0)
831 							overflo("cgoto overflow");
832 					}
833 					if (setvec[q[j]] == 0) {
834 						setcnt++;
835 						setvec[q[j]] = 1;
836 					}
837 				}
838 			}
839 		}
840 	}
841 	/* determine if setvec is a previous state */
842 	tmpset[0] = setcnt;
843 	j = 1;
844 	for (i = f->accept; i >= 0; i--)
845 		if (setvec[i]) {
846 			tmpset[j++] = i;
847 		}
848 	/* tmpset == previous state? */
849 	for (i = 1; i <= f->curstat; i++) {
850 		p = f->posns[i];
851 		if ((k = tmpset[0]) != p[0])
852 			goto different;
853 		for (j = 1; j <= k; j++)
854 			if (tmpset[j] != p[j])
855 				goto different;
856 		/* setvec is state i */
857 		f->gototab[s][c] = i;
858 		return i;
859 	  different:;
860 	}
861 
862 	/* add tmpset to current set of states */
863 	if (f->curstat >= NSTATES-1) {
864 		f->curstat = 2;
865 		f->reset = 1;
866 		for (i = 2; i < NSTATES; i++)
867 			xfree(f->posns[i]);
868 	} else
869 		++(f->curstat);
870 	for (i = 0; i < NCHARS; i++)
871 		f->gototab[f->curstat][i] = 0;
872 	xfree(f->posns[f->curstat]);
873 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
874 		overflo("out of space in cgoto");
875 
876 	f->posns[f->curstat] = p;
877 	f->gototab[s][c] = f->curstat;
878 	for (i = 0; i <= setcnt; i++)
879 		p[i] = tmpset[i];
880 	if (setvec[f->accept])
881 		f->out[f->curstat] = 1;
882 	else
883 		f->out[f->curstat] = 0;
884 	return f->curstat;
885 }
886 
887 
888 void freefa(fa *f)	/* free a finite automaton */
889 {
890 	int i;
891 
892 	if (f == NULL)
893 		return;
894 	for (i = 0; i <= f->curstat; i++)
895 		xfree(f->posns[i]);
896 	for (i = 0; i <= f->accept; i++) {
897 		xfree(f->re[i].lfollow);
898 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
899 			xfree((f->re[i].lval.np));
900 	}
901 	xfree(f->restr);
902 	xfree(f);
903 }
904