xref: /openbsd-src/usr.bin/awk/b.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: b.c,v 1.20 2018/01/24 16:28:25 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'intrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "ytab.h"
36 
37 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
38 				/* NCHARS is 2**n */
39 #define MAXLIN 22
40 
41 #define type(v)		(v)->nobj	/* badly overloaded here */
42 #define info(v)		(v)->ntype	/* badly overloaded here */
43 #define left(v)		(v)->narg[0]
44 #define right(v)	(v)->narg[1]
45 #define parent(v)	(v)->nnext
46 
47 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define ELEAF	case EMPTYRE:		/* empty string in regexp */
49 #define UNARY	case STAR: case PLUS: case QUEST:
50 
51 /* encoding in tree Nodes:
52 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
53 		left is index, right contains value or pointer to value
54 	unary (STAR, PLUS, QUEST): left is child, right is null
55 	binary (CAT, OR): left and right are children
56 	parent contains pointer to parent
57 */
58 
59 
60 int	*setvec;
61 int	*tmpset;
62 int	maxsetvec = 0;
63 
64 int	rtok;		/* next token in current re */
65 int	rlxval;
66 static uschar	*rlxstr;
67 static uschar	*prestr;	/* current position in current re */
68 static uschar	*lastre;	/* origin of last re */
69 
70 static	int setcnt;
71 static	int poscnt;
72 
73 char	*patbeg;
74 int	patlen;
75 
76 #define	NFA	20	/* cache this many dynamic fa's */
77 fa	*fatab[NFA];
78 int	nfatab	= 0;	/* entries in fatab */
79 
80 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
81 {
82 	int i, use, nuse;
83 	fa *pfa;
84 	static int now = 1;
85 
86 	if (setvec == 0) {	/* first time through any RE */
87 		maxsetvec = MAXLIN;
88 		setvec = (int *) calloc(maxsetvec, sizeof(int));
89 		tmpset = (int *) calloc(maxsetvec, sizeof(int));
90 		if (setvec == 0 || tmpset == 0)
91 			overflo("out of space initializing makedfa");
92 	}
93 
94 	if (compile_time)	/* a constant for sure */
95 		return mkdfa(s, anchor);
96 	for (i = 0; i < nfatab; i++)	/* is it there already? */
97 		if (fatab[i]->anchor == anchor
98 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
99 			fatab[i]->use = now++;
100 			return fatab[i];
101 		}
102 	pfa = mkdfa(s, anchor);
103 	if (nfatab < NFA) {	/* room for another */
104 		fatab[nfatab] = pfa;
105 		fatab[nfatab]->use = now++;
106 		nfatab++;
107 		return pfa;
108 	}
109 	use = fatab[0]->use;	/* replace least-recently used */
110 	nuse = 0;
111 	for (i = 1; i < nfatab; i++)
112 		if (fatab[i]->use < use) {
113 			use = fatab[i]->use;
114 			nuse = i;
115 		}
116 	freefa(fatab[nuse]);
117 	fatab[nuse] = pfa;
118 	pfa->use = now++;
119 	return pfa;
120 }
121 
122 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
123 				/* anchor = 1 for anchored matches, else 0 */
124 {
125 	Node *p, *p1;
126 	fa *f;
127 
128 	p = reparse(s);
129 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
130 		/* put ALL STAR in front of reg.  exp. */
131 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
132 		/* put FINAL after reg.  exp. */
133 
134 	poscnt = 0;
135 	penter(p1);	/* enter parent pointers and leaf indices */
136 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
137 		overflo("out of space for fa");
138 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
139 	cfoll(f, p1);	/* set up follow sets */
140 	freetr(p1);
141 	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
142 			overflo("out of space in makedfa");
143 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
144 		overflo("out of space in makedfa");
145 	*f->posns[1] = 0;
146 	f->initstat = makeinit(f, anchor);
147 	f->anchor = anchor;
148 	f->restr = (uschar *) tostring(s);
149 	return f;
150 }
151 
152 int makeinit(fa *f, int anchor)
153 {
154 	int i, k;
155 
156 	f->curstat = 2;
157 	f->out[2] = 0;
158 	f->reset = 0;
159 	k = *(f->re[0].lfollow);
160 	xfree(f->posns[2]);
161 	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
162 		overflo("out of space in makeinit");
163 	for (i=0; i <= k; i++) {
164 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
165 	}
166 	if ((f->posns[2])[1] == f->accept)
167 		f->out[2] = 1;
168 	for (i=0; i < NCHARS; i++)
169 		f->gototab[2][i] = 0;
170 	f->curstat = cgoto(f, 2, HAT);
171 	if (anchor) {
172 		*f->posns[2] = k-1;	/* leave out position 0 */
173 		for (i=0; i < k; i++) {
174 			(f->posns[0])[i] = (f->posns[2])[i];
175 		}
176 
177 		f->out[0] = f->out[2];
178 		if (f->curstat != 2)
179 			--(*f->posns[f->curstat]);
180 	}
181 	return f->curstat;
182 }
183 
184 void penter(Node *p)	/* set up parent pointers and leaf indices */
185 {
186 	switch (type(p)) {
187 	ELEAF
188 	LEAF
189 		info(p) = poscnt;
190 		poscnt++;
191 		break;
192 	UNARY
193 		penter(left(p));
194 		parent(left(p)) = p;
195 		break;
196 	case CAT:
197 	case OR:
198 		penter(left(p));
199 		penter(right(p));
200 		parent(left(p)) = p;
201 		parent(right(p)) = p;
202 		break;
203 	default:	/* can't happen */
204 		FATAL("can't happen: unknown type %d in penter", type(p));
205 		break;
206 	}
207 }
208 
209 void freetr(Node *p)	/* free parse tree */
210 {
211 	switch (type(p)) {
212 	ELEAF
213 	LEAF
214 		xfree(p);
215 		break;
216 	UNARY
217 		freetr(left(p));
218 		xfree(p);
219 		break;
220 	case CAT:
221 	case OR:
222 		freetr(left(p));
223 		freetr(right(p));
224 		xfree(p);
225 		break;
226 	default:	/* can't happen */
227 		FATAL("can't happen: unknown type %d in freetr", type(p));
228 		break;
229 	}
230 }
231 
232 /* in the parsing of regular expressions, metacharacters like . have */
233 /* to be seen literally;  \056 is not a metacharacter. */
234 
235 int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
236 {			/* only pick up one 8-bit byte (2 chars) */
237 	uschar *p;
238 	int n = 0;
239 	int i;
240 
241 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
242 		if (isdigit(*p))
243 			n = 16 * n + *p - '0';
244 		else if (*p >= 'a' && *p <= 'f')
245 			n = 16 * n + *p - 'a' + 10;
246 		else if (*p >= 'A' && *p <= 'F')
247 			n = 16 * n + *p - 'A' + 10;
248 	}
249 	*pp = (uschar *) p;
250 	return n;
251 }
252 
253 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
254 
255 int quoted(uschar **pp)	/* pick up next thing after a \\ */
256 			/* and increment *pp */
257 {
258 	uschar *p = *pp;
259 	int c;
260 
261 	if ((c = *p++) == 't')
262 		c = '\t';
263 	else if (c == 'v')
264 		c = '\v';
265 	else if (c == 'n')
266 		c = '\n';
267 	else if (c == 'f')
268 		c = '\f';
269 	else if (c == 'r')
270 		c = '\r';
271 	else if (c == 'b')
272 		c = '\b';
273 	else if (c == 'a')
274 		c = '\007';
275 	else if (c == '\\')
276 		c = '\\';
277 	else if (c == 'x') {	/* hexadecimal goo follows */
278 		c = hexstr(&p);	/* this adds a null if number is invalid */
279 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
280 		int n = c - '0';
281 		if (isoctdigit(*p)) {
282 			n = 8 * n + *p++ - '0';
283 			if (isoctdigit(*p))
284 				n = 8 * n + *p++ - '0';
285 		}
286 		c = n;
287 	} /* else */
288 		/* c = c; */
289 	*pp = p;
290 	return c;
291 }
292 
293 char *cclenter(const char *argp)	/* add a character class */
294 {
295 	int i, c, c2;
296 	uschar *p = (uschar *) argp;
297 	uschar *op, *bp;
298 	static uschar *buf = 0;
299 	static int bufsz = 100;
300 
301 	op = p;
302 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
303 		FATAL("out of space for character class [%.10s...] 1", p);
304 	bp = buf;
305 	for (i = 0; (c = *p++) != 0; ) {
306 		if (c == '\\') {
307 			c = quoted(&p);
308 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
309 			if (*p != 0) {
310 				c = bp[-1];
311 				c2 = *p++;
312 				if (c2 == '\\')
313 					c2 = quoted(&p);
314 				if (c > c2) {	/* empty; ignore */
315 					bp--;
316 					i--;
317 					continue;
318 				}
319 				while (c < c2) {
320 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
321 						FATAL("out of space for character class [%.10s...] 2", p);
322 					*bp++ = ++c;
323 					i++;
324 				}
325 				continue;
326 			}
327 		}
328 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
329 			FATAL("out of space for character class [%.10s...] 3", p);
330 		*bp++ = c;
331 		i++;
332 	}
333 	*bp = 0;
334 	DPRINTF( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
335 	xfree(op);
336 	return (char *) tostring((char *) buf);
337 }
338 
339 void overflo(const char *s)
340 {
341 	FATAL("regular expression too big: %.30s...", s);
342 }
343 
344 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
345 {
346 	int i;
347 	int *p;
348 
349 	switch (type(v)) {
350 	ELEAF
351 	LEAF
352 		f->re[info(v)].ltype = type(v);
353 		f->re[info(v)].lval.np = right(v);
354 		while (f->accept >= maxsetvec) {	/* guessing here! */
355 			setvec = reallocarray(setvec, maxsetvec,
356 			    4 * sizeof(int));
357 			tmpset = reallocarray(tmpset, maxsetvec,
358 			    4 * sizeof(int));
359 			if (setvec == 0 || tmpset == 0)
360 				overflo("out of space in cfoll()");
361 			maxsetvec *= 4;
362 		}
363 		for (i = 0; i <= f->accept; i++)
364 			setvec[i] = 0;
365 		setcnt = 0;
366 		follow(v);	/* computes setvec and setcnt */
367 		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
368 			overflo("out of space building follow set");
369 		f->re[info(v)].lfollow = p;
370 		*p = setcnt;
371 		for (i = f->accept; i >= 0; i--)
372 			if (setvec[i] == 1)
373 				*++p = i;
374 		break;
375 	UNARY
376 		cfoll(f,left(v));
377 		break;
378 	case CAT:
379 	case OR:
380 		cfoll(f,left(v));
381 		cfoll(f,right(v));
382 		break;
383 	default:	/* can't happen */
384 		FATAL("can't happen: unknown type %d in cfoll", type(v));
385 	}
386 }
387 
388 int first(Node *p)	/* collects initially active leaves of p into setvec */
389 			/* returns 0 if p matches empty string */
390 {
391 	int b, lp;
392 
393 	switch (type(p)) {
394 	ELEAF
395 	LEAF
396 		lp = info(p);	/* look for high-water mark of subscripts */
397 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
398 			setvec = reallocarray(setvec, maxsetvec,
399 			    4 * sizeof(int));
400 			tmpset = reallocarray(tmpset, maxsetvec,
401 			    4 * sizeof(int));
402 			if (setvec == 0 || tmpset == 0)
403 				overflo("out of space in first()");
404 			maxsetvec *= 4;
405 		}
406 		if (type(p) == EMPTYRE) {
407 			setvec[lp] = 0;
408 			return(0);
409 		}
410 		if (setvec[lp] != 1) {
411 			setvec[lp] = 1;
412 			setcnt++;
413 		}
414 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
415 			return(0);		/* empty CCL */
416 		else return(1);
417 	case PLUS:
418 		if (first(left(p)) == 0) return(0);
419 		return(1);
420 	case STAR:
421 	case QUEST:
422 		first(left(p));
423 		return(0);
424 	case CAT:
425 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
426 		return(1);
427 	case OR:
428 		b = first(right(p));
429 		if (first(left(p)) == 0 || b == 0) return(0);
430 		return(1);
431 	}
432 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
433 	return(-1);
434 }
435 
436 void follow(Node *v)	/* collects leaves that can follow v into setvec */
437 {
438 	Node *p;
439 
440 	if (type(v) == FINAL)
441 		return;
442 	p = parent(v);
443 	switch (type(p)) {
444 	case STAR:
445 	case PLUS:
446 		first(v);
447 		follow(p);
448 		return;
449 
450 	case OR:
451 	case QUEST:
452 		follow(p);
453 		return;
454 
455 	case CAT:
456 		if (v == left(p)) {	/* v is left child of p */
457 			if (first(right(p)) == 0) {
458 				follow(p);
459 				return;
460 			}
461 		} else		/* v is right child */
462 			follow(p);
463 		return;
464 	}
465 }
466 
467 int member(int c, const char *sarg)	/* is c in s? */
468 {
469 	uschar *s = (uschar *) sarg;
470 
471 	while (*s)
472 		if (c == *s++)
473 			return(1);
474 	return(0);
475 }
476 
477 int match(fa *f, const char *p0)	/* shortest match ? */
478 {
479 	int s, ns;
480 	uschar *p = (uschar *) p0;
481 
482 	s = f->reset ? makeinit(f,0) : f->initstat;
483 	if (f->out[s])
484 		return(1);
485 	do {
486 		/* assert(*p < NCHARS); */
487 		if ((ns = f->gototab[s][*p]) != 0)
488 			s = ns;
489 		else
490 			s = cgoto(f, s, *p);
491 		if (f->out[s])
492 			return(1);
493 	} while (*p++ != 0);
494 	return(0);
495 }
496 
497 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
498 {
499 	int s, ns;
500 	uschar *p = (uschar *) p0;
501 	uschar *q;
502 	int i, k;
503 
504 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
505 	if (f->reset) {
506 		f->initstat = s = makeinit(f,1);
507 	} else {
508 		s = f->initstat;
509 	}
510 	patbeg = (char *) p;
511 	patlen = -1;
512 	do {
513 		q = p;
514 		do {
515 			if (f->out[s])		/* final state */
516 				patlen = q-p;
517 			/* assert(*q < NCHARS); */
518 			if ((ns = f->gototab[s][*q]) != 0)
519 				s = ns;
520 			else
521 				s = cgoto(f, s, *q);
522 			if (s == 1) {	/* no transition */
523 				if (patlen >= 0) {
524 					patbeg = (char *) p;
525 					return(1);
526 				}
527 				else
528 					goto nextin;	/* no match */
529 			}
530 		} while (*q++ != 0);
531 		if (f->out[s])
532 			patlen = q-p-1;	/* don't count $ */
533 		if (patlen >= 0) {
534 			patbeg = (char *) p;
535 			return(1);
536 		}
537 	nextin:
538 		s = 2;
539 		if (f->reset) {
540 			for (i = 2; i <= f->curstat; i++)
541 				xfree(f->posns[i]);
542 			k = *f->posns[0];
543 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
544 				overflo("out of space in pmatch");
545 			for (i = 0; i <= k; i++)
546 				(f->posns[2])[i] = (f->posns[0])[i];
547 			f->initstat = f->curstat = 2;
548 			f->out[2] = f->out[0];
549 			for (i = 0; i < NCHARS; i++)
550 				f->gototab[2][i] = 0;
551 		}
552 	} while (*p++ != 0);
553 	return (0);
554 }
555 
556 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
557 {
558 	int s, ns;
559 	uschar *p = (uschar *) p0;
560 	uschar *q;
561 	int i, k;
562 
563 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
564 	if (f->reset) {
565 		f->initstat = s = makeinit(f,1);
566 	} else {
567 		s = f->initstat;
568 	}
569 	patlen = -1;
570 	while (*p) {
571 		q = p;
572 		do {
573 			if (f->out[s])		/* final state */
574 				patlen = q-p;
575 			/* assert(*q < NCHARS); */
576 			if ((ns = f->gototab[s][*q]) != 0)
577 				s = ns;
578 			else
579 				s = cgoto(f, s, *q);
580 			if (s == 1) {	/* no transition */
581 				if (patlen > 0) {
582 					patbeg = (char *) p;
583 					return(1);
584 				} else
585 					goto nnextin;	/* no nonempty match */
586 			}
587 		} while (*q++ != 0);
588 		if (f->out[s])
589 			patlen = q-p-1;	/* don't count $ */
590 		if (patlen > 0 ) {
591 			patbeg = (char *) p;
592 			return(1);
593 		}
594 	nnextin:
595 		s = 2;
596 		if (f->reset) {
597 			for (i = 2; i <= f->curstat; i++)
598 				xfree(f->posns[i]);
599 			k = *f->posns[0];
600 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
601 				overflo("out of state space");
602 			for (i = 0; i <= k; i++)
603 				(f->posns[2])[i] = (f->posns[0])[i];
604 			f->initstat = f->curstat = 2;
605 			f->out[2] = f->out[0];
606 			for (i = 0; i < NCHARS; i++)
607 				f->gototab[2][i] = 0;
608 		}
609 		p++;
610 	}
611 	return (0);
612 }
613 
614 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
615 {			/* uses relex() to scan regular expression */
616 	Node *np;
617 
618 	DPRINTF( ("reparse <%s>\n", p) );
619 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
620 	rtok = relex();
621 	/* GNU compatibility: an empty regexp matches anything */
622 	if (rtok == '\0') {
623 		/* FATAL("empty regular expression"); previous */
624 		return(op2(EMPTYRE, NIL, NIL));
625 	}
626 	np = regexp();
627 	if (rtok != '\0')
628 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
629 	return(np);
630 }
631 
632 Node *regexp(void)	/* top-level parse of reg expr */
633 {
634 	return (alt(concat(primary())));
635 }
636 
637 Node *primary(void)
638 {
639 	Node *np;
640 
641 	switch (rtok) {
642 	case CHAR:
643 		np = op2(CHAR, NIL, itonp(rlxval));
644 		rtok = relex();
645 		return (unary(np));
646 	case ALL:
647 		rtok = relex();
648 		return (unary(op2(ALL, NIL, NIL)));
649 	case EMPTYRE:
650 		rtok = relex();
651 		return (unary(op2(ALL, NIL, NIL)));
652 	case DOT:
653 		rtok = relex();
654 		return (unary(op2(DOT, NIL, NIL)));
655 	case CCL:
656 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
657 		rtok = relex();
658 		return (unary(np));
659 	case NCCL:
660 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
661 		rtok = relex();
662 		return (unary(np));
663 	case '^':
664 		rtok = relex();
665 		return (unary(op2(CHAR, NIL, itonp(HAT))));
666 	case '$':
667 		rtok = relex();
668 		return (unary(op2(CHAR, NIL, NIL)));
669 	case '(':
670 		rtok = relex();
671 		if (rtok == ')') {	/* special pleading for () */
672 			rtok = relex();
673 			return unary(op2(CCL, NIL, (Node *) tostring("")));
674 		}
675 		np = regexp();
676 		if (rtok == ')') {
677 			rtok = relex();
678 			return (unary(np));
679 		}
680 		else
681 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
682 	default:
683 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
684 	}
685 	return 0;	/*NOTREACHED*/
686 }
687 
688 Node *concat(Node *np)
689 {
690 	switch (rtok) {
691 	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
692 		return (concat(op2(CAT, np, primary())));
693 	}
694 	return (np);
695 }
696 
697 Node *alt(Node *np)
698 {
699 	if (rtok == OR) {
700 		rtok = relex();
701 		return (alt(op2(OR, np, concat(primary()))));
702 	}
703 	return (np);
704 }
705 
706 Node *unary(Node *np)
707 {
708 	switch (rtok) {
709 	case STAR:
710 		rtok = relex();
711 		return (unary(op2(STAR, np, NIL)));
712 	case PLUS:
713 		rtok = relex();
714 		return (unary(op2(PLUS, np, NIL)));
715 	case QUEST:
716 		rtok = relex();
717 		return (unary(op2(QUEST, np, NIL)));
718 	default:
719 		return (np);
720 	}
721 }
722 
723 /*
724  * Character class definitions conformant to the POSIX locale as
725  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
726  * and operating character sets are both ASCII (ISO646) or supersets
727  * thereof.
728  *
729  * Note that to avoid overflowing the temporary buffer used in
730  * relex(), the expanded character class (prior to range expansion)
731  * must be less than twice the size of their full name.
732  */
733 
734 /* Because isblank doesn't show up in any of the header files on any
735  * system i use, it's defined here.  if some other locale has a richer
736  * definition of "blank", define HAS_ISBLANK and provide your own
737  * version.
738  * the parentheses here are an attempt to find a path through the maze
739  * of macro definition and/or function and/or version provided.  thanks
740  * to nelson beebe for the suggestion; let's see if it works everywhere.
741  */
742 
743 #ifndef HAS_ISBLANK
744 
745 int (xisblank)(int c)
746 {
747 	return c==' ' || c=='\t';
748 }
749 
750 #endif
751 
752 struct charclass {
753 	const char *cc_name;
754 	int cc_namelen;
755 	int (*cc_func)(int);
756 } charclasses[] = {
757 	{ "alnum",	5,	isalnum },
758 	{ "alpha",	5,	isalpha },
759 #ifndef HAS_ISBLANK
760 	{ "blank",	5,	isspace }, /* was isblank */
761 #else
762 	{ "blank",	5,	isblank },
763 #endif
764 	{ "cntrl",	5,	iscntrl },
765 	{ "digit",	5,	isdigit },
766 	{ "graph",	5,	isgraph },
767 	{ "lower",	5,	islower },
768 	{ "print",	5,	isprint },
769 	{ "punct",	5,	ispunct },
770 	{ "space",	5,	isspace },
771 	{ "upper",	5,	isupper },
772 	{ "xdigit",	6,	isxdigit },
773 	{ NULL,		0,	NULL },
774 };
775 
776 
777 int relex(void)		/* lexical analyzer for reparse */
778 {
779 	int c, n;
780 	int cflag;
781 	static uschar *buf = 0;
782 	static int bufsz = 100;
783 	uschar *bp;
784 	struct charclass *cc;
785 	int i;
786 
787 	switch (c = *prestr++) {
788 	case '|': return OR;
789 	case '*': return STAR;
790 	case '+': return PLUS;
791 	case '?': return QUEST;
792 	case '.': return DOT;
793 	case '\0': prestr--; return '\0';
794 	case '^':
795 	case '$':
796 	case '(':
797 	case ')':
798 		return c;
799 	case '\\':
800 		rlxval = quoted(&prestr);
801 		return CHAR;
802 	default:
803 		rlxval = c;
804 		return CHAR;
805 	case '[':
806 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
807 			FATAL("out of space in reg expr %.10s..", lastre);
808 		bp = buf;
809 		if (*prestr == '^') {
810 			cflag = 1;
811 			prestr++;
812 		}
813 		else
814 			cflag = 0;
815 		n = 2 * strlen((const char *) prestr)+1;
816 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
817 			FATAL("out of space for reg expr %.10s...", lastre);
818 		for (; ; ) {
819 			if ((c = *prestr++) == '\\') {
820 				*bp++ = '\\';
821 				if ((c = *prestr++) == '\0')
822 					FATAL("nonterminated character class %.20s...", lastre);
823 				*bp++ = c;
824 			/* } else if (c == '\n') { */
825 			/* 	FATAL("newline in character class %.20s...", lastre); */
826 			} else if (c == '[' && *prestr == ':') {
827 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
828 				for (cc = charclasses; cc->cc_name; cc++)
829 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
830 						break;
831 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
832 				    prestr[2 + cc->cc_namelen] == ']') {
833 					prestr += cc->cc_namelen + 3;
834 					for (i = 0; i < NCHARS; i++) {
835 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
836 						    FATAL("out of space for reg expr %.10s...", lastre);
837 						if (cc->cc_func(i)) {
838 							*bp++ = i;
839 							n++;
840 						}
841 					}
842 				} else
843 					*bp++ = c;
844 			} else if (c == '\0') {
845 				FATAL("nonterminated character class %.20s", lastre);
846 			} else if (bp == buf) {	/* 1st char is special */
847 				*bp++ = c;
848 			} else if (c == ']') {
849 				*bp++ = 0;
850 				rlxstr = (uschar *) tostring((char *) buf);
851 				if (cflag == 0)
852 					return CCL;
853 				else
854 					return NCCL;
855 			} else
856 				*bp++ = c;
857 		}
858 	}
859 }
860 
861 int cgoto(fa *f, int s, int c)
862 {
863 	int i, j, k;
864 	int *p, *q;
865 
866 	assert(c == HAT || c < NCHARS);
867 	while (f->accept >= maxsetvec) {	/* guessing here! */
868 		setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int));
869 		tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int));
870 		if (setvec == 0 || tmpset == 0)
871 			overflo("out of space in cgoto()");
872 		maxsetvec *= 4;
873 	}
874 	for (i = 0; i <= f->accept; i++)
875 		setvec[i] = 0;
876 	setcnt = 0;
877 	/* compute positions of gototab[s,c] into setvec */
878 	p = f->posns[s];
879 	for (i = 1; i <= *p; i++) {
880 		if ((k = f->re[p[i]].ltype) != FINAL) {
881 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
882 			 || (k == DOT && c != 0 && c != HAT)
883 			 || (k == ALL && c != 0)
884 			 || (k == EMPTYRE && c != 0)
885 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
886 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
887 				q = f->re[p[i]].lfollow;
888 				for (j = 1; j <= *q; j++) {
889 					if (q[j] >= maxsetvec) {
890 						setvec = reallocarray(setvec,
891 						    maxsetvec, 4 * sizeof(int));
892 						tmpset = reallocarray(tmpset,
893 						    maxsetvec, 4 * sizeof(int));
894 						if (setvec == 0 || tmpset == 0)
895 							overflo("cgoto overflow");
896 						maxsetvec *= 4;
897 					}
898 					if (setvec[q[j]] == 0) {
899 						setcnt++;
900 						setvec[q[j]] = 1;
901 					}
902 				}
903 			}
904 		}
905 	}
906 	/* determine if setvec is a previous state */
907 	tmpset[0] = setcnt;
908 	j = 1;
909 	for (i = f->accept; i >= 0; i--)
910 		if (setvec[i]) {
911 			tmpset[j++] = i;
912 		}
913 	/* tmpset == previous state? */
914 	for (i = 1; i <= f->curstat; i++) {
915 		p = f->posns[i];
916 		if ((k = tmpset[0]) != p[0])
917 			goto different;
918 		for (j = 1; j <= k; j++)
919 			if (tmpset[j] != p[j])
920 				goto different;
921 		/* setvec is state i */
922 		f->gototab[s][c] = i;
923 		return i;
924 	  different:;
925 	}
926 
927 	/* add tmpset to current set of states */
928 	if (f->curstat >= NSTATES-1) {
929 		f->curstat = 2;
930 		f->reset = 1;
931 		for (i = 2; i < NSTATES; i++)
932 			xfree(f->posns[i]);
933 	} else
934 		++(f->curstat);
935 	for (i = 0; i < NCHARS; i++)
936 		f->gototab[f->curstat][i] = 0;
937 	xfree(f->posns[f->curstat]);
938 	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
939 		overflo("out of space in cgoto");
940 
941 	f->posns[f->curstat] = p;
942 	f->gototab[s][c] = f->curstat;
943 	for (i = 0; i <= setcnt; i++)
944 		p[i] = tmpset[i];
945 	if (setvec[f->accept])
946 		f->out[f->curstat] = 1;
947 	else
948 		f->out[f->curstat] = 0;
949 	return f->curstat;
950 }
951 
952 
953 void freefa(fa *f)	/* free a finite automaton */
954 {
955 	int i;
956 
957 	if (f == NULL)
958 		return;
959 	for (i = 0; i <= f->curstat; i++)
960 		xfree(f->posns[i]);
961 	for (i = 0; i <= f->accept; i++) {
962 		xfree(f->re[i].lfollow);
963 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
964 			xfree((f->re[i].lval.np));
965 	}
966 	xfree(f->restr);
967 	xfree(f);
968 }
969