xref: /openbsd-src/usr.bin/awk/b.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: b.c,v 1.18 2014/12/19 19:28:55 deraadt Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'intrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "ytab.h"
36 
37 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
38 				/* NCHARS is 2**n */
39 #define MAXLIN 22
40 
41 #define type(v)		(v)->nobj	/* badly overloaded here */
42 #define info(v)		(v)->ntype	/* badly overloaded here */
43 #define left(v)		(v)->narg[0]
44 #define right(v)	(v)->narg[1]
45 #define parent(v)	(v)->nnext
46 
47 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define ELEAF	case EMPTYRE:		/* empty string in regexp */
49 #define UNARY	case STAR: case PLUS: case QUEST:
50 
51 /* encoding in tree Nodes:
52 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
53 		left is index, right contains value or pointer to value
54 	unary (STAR, PLUS, QUEST): left is child, right is null
55 	binary (CAT, OR): left and right are children
56 	parent contains pointer to parent
57 */
58 
59 
60 int	*setvec;
61 int	*tmpset;
62 int	maxsetvec = 0;
63 
64 int	rtok;		/* next token in current re */
65 int	rlxval;
66 static uschar	*rlxstr;
67 static uschar	*prestr;	/* current position in current re */
68 static uschar	*lastre;	/* origin of last re */
69 
70 static	int setcnt;
71 static	int poscnt;
72 
73 char	*patbeg;
74 int	patlen;
75 
76 #define	NFA	20	/* cache this many dynamic fa's */
77 fa	*fatab[NFA];
78 int	nfatab	= 0;	/* entries in fatab */
79 
80 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
81 {
82 	int i, use, nuse;
83 	fa *pfa;
84 	static int now = 1;
85 
86 	if (setvec == 0) {	/* first time through any RE */
87 		maxsetvec = MAXLIN;
88 		setvec = (int *) calloc(maxsetvec, sizeof(int));
89 		tmpset = (int *) calloc(maxsetvec, sizeof(int));
90 		if (setvec == 0 || tmpset == 0)
91 			overflo("out of space initializing makedfa");
92 	}
93 
94 	if (compile_time)	/* a constant for sure */
95 		return mkdfa(s, anchor);
96 	for (i = 0; i < nfatab; i++)	/* is it there already? */
97 		if (fatab[i]->anchor == anchor
98 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
99 			fatab[i]->use = now++;
100 			return fatab[i];
101 		}
102 	pfa = mkdfa(s, anchor);
103 	if (nfatab < NFA) {	/* room for another */
104 		fatab[nfatab] = pfa;
105 		fatab[nfatab]->use = now++;
106 		nfatab++;
107 		return pfa;
108 	}
109 	use = fatab[0]->use;	/* replace least-recently used */
110 	nuse = 0;
111 	for (i = 1; i < nfatab; i++)
112 		if (fatab[i]->use < use) {
113 			use = fatab[i]->use;
114 			nuse = i;
115 		}
116 	freefa(fatab[nuse]);
117 	fatab[nuse] = pfa;
118 	pfa->use = now++;
119 	return pfa;
120 }
121 
122 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
123 				/* anchor = 1 for anchored matches, else 0 */
124 {
125 	Node *p, *p1;
126 	fa *f;
127 
128 	p = reparse(s);
129 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
130 		/* put ALL STAR in front of reg.  exp. */
131 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
132 		/* put FINAL after reg.  exp. */
133 
134 	poscnt = 0;
135 	penter(p1);	/* enter parent pointers and leaf indices */
136 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
137 		overflo("out of space for fa");
138 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
139 	cfoll(f, p1);	/* set up follow sets */
140 	freetr(p1);
141 	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
142 			overflo("out of space in makedfa");
143 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
144 		overflo("out of space in makedfa");
145 	*f->posns[1] = 0;
146 	f->initstat = makeinit(f, anchor);
147 	f->anchor = anchor;
148 	f->restr = (uschar *) tostring(s);
149 	return f;
150 }
151 
152 int makeinit(fa *f, int anchor)
153 {
154 	int i, k;
155 
156 	f->curstat = 2;
157 	f->out[2] = 0;
158 	f->reset = 0;
159 	k = *(f->re[0].lfollow);
160 	xfree(f->posns[2]);
161 	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
162 		overflo("out of space in makeinit");
163 	for (i=0; i <= k; i++) {
164 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
165 	}
166 	if ((f->posns[2])[1] == f->accept)
167 		f->out[2] = 1;
168 	for (i=0; i < NCHARS; i++)
169 		f->gototab[2][i] = 0;
170 	f->curstat = cgoto(f, 2, HAT);
171 	if (anchor) {
172 		*f->posns[2] = k-1;	/* leave out position 0 */
173 		for (i=0; i < k; i++) {
174 			(f->posns[0])[i] = (f->posns[2])[i];
175 		}
176 
177 		f->out[0] = f->out[2];
178 		if (f->curstat != 2)
179 			--(*f->posns[f->curstat]);
180 	}
181 	return f->curstat;
182 }
183 
184 void penter(Node *p)	/* set up parent pointers and leaf indices */
185 {
186 	switch (type(p)) {
187 	ELEAF
188 	LEAF
189 		info(p) = poscnt;
190 		poscnt++;
191 		break;
192 	UNARY
193 		penter(left(p));
194 		parent(left(p)) = p;
195 		break;
196 	case CAT:
197 	case OR:
198 		penter(left(p));
199 		penter(right(p));
200 		parent(left(p)) = p;
201 		parent(right(p)) = p;
202 		break;
203 	default:	/* can't happen */
204 		FATAL("can't happen: unknown type %d in penter", type(p));
205 		break;
206 	}
207 }
208 
209 void freetr(Node *p)	/* free parse tree */
210 {
211 	switch (type(p)) {
212 	ELEAF
213 	LEAF
214 		xfree(p);
215 		break;
216 	UNARY
217 		freetr(left(p));
218 		xfree(p);
219 		break;
220 	case CAT:
221 	case OR:
222 		freetr(left(p));
223 		freetr(right(p));
224 		xfree(p);
225 		break;
226 	default:	/* can't happen */
227 		FATAL("can't happen: unknown type %d in freetr", type(p));
228 		break;
229 	}
230 }
231 
232 /* in the parsing of regular expressions, metacharacters like . have */
233 /* to be seen literally;  \056 is not a metacharacter. */
234 
235 int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
236 {			/* only pick up one 8-bit byte (2 chars) */
237 	uschar *p;
238 	int n = 0;
239 	int i;
240 
241 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
242 		if (isdigit(*p))
243 			n = 16 * n + *p - '0';
244 		else if (*p >= 'a' && *p <= 'f')
245 			n = 16 * n + *p - 'a' + 10;
246 		else if (*p >= 'A' && *p <= 'F')
247 			n = 16 * n + *p - 'A' + 10;
248 	}
249 	*pp = (uschar *) p;
250 	return n;
251 }
252 
253 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
254 
255 int quoted(uschar **pp)	/* pick up next thing after a \\ */
256 			/* and increment *pp */
257 {
258 	uschar *p = *pp;
259 	int c;
260 
261 	if ((c = *p++) == 't')
262 		c = '\t';
263 	else if (c == 'n')
264 		c = '\n';
265 	else if (c == 'f')
266 		c = '\f';
267 	else if (c == 'r')
268 		c = '\r';
269 	else if (c == 'b')
270 		c = '\b';
271 	else if (c == '\\')
272 		c = '\\';
273 	else if (c == 'x') {	/* hexadecimal goo follows */
274 		c = hexstr(&p);	/* this adds a null if number is invalid */
275 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
276 		int n = c - '0';
277 		if (isoctdigit(*p)) {
278 			n = 8 * n + *p++ - '0';
279 			if (isoctdigit(*p))
280 				n = 8 * n + *p++ - '0';
281 		}
282 		c = n;
283 	} /* else */
284 		/* c = c; */
285 	*pp = p;
286 	return c;
287 }
288 
289 char *cclenter(const char *argp)	/* add a character class */
290 {
291 	int i, c, c2;
292 	uschar *p = (uschar *) argp;
293 	uschar *op, *bp;
294 	static uschar *buf = 0;
295 	static int bufsz = 100;
296 
297 	op = p;
298 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
299 		FATAL("out of space for character class [%.10s...] 1", p);
300 	bp = buf;
301 	for (i = 0; (c = *p++) != 0; ) {
302 		if (c == '\\') {
303 			c = quoted(&p);
304 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
305 			if (*p != 0) {
306 				c = bp[-1];
307 				c2 = *p++;
308 				if (c2 == '\\')
309 					c2 = quoted(&p);
310 				if (c > c2) {	/* empty; ignore */
311 					bp--;
312 					i--;
313 					continue;
314 				}
315 				while (c < c2) {
316 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
317 						FATAL("out of space for character class [%.10s...] 2", p);
318 					*bp++ = ++c;
319 					i++;
320 				}
321 				continue;
322 			}
323 		}
324 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
325 			FATAL("out of space for character class [%.10s...] 3", p);
326 		*bp++ = c;
327 		i++;
328 	}
329 	*bp = 0;
330 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
331 	xfree(op);
332 	return (char *) tostring((char *) buf);
333 }
334 
335 void overflo(const char *s)
336 {
337 	FATAL("regular expression too big: %.30s...", s);
338 }
339 
340 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
341 {
342 	int i;
343 	int *p;
344 
345 	switch (type(v)) {
346 	ELEAF
347 	LEAF
348 		f->re[info(v)].ltype = type(v);
349 		f->re[info(v)].lval.np = right(v);
350 		while (f->accept >= maxsetvec) {	/* guessing here! */
351 			setvec = reallocarray(setvec, maxsetvec,
352 			    4 * sizeof(int));
353 			tmpset = reallocarray(tmpset, maxsetvec,
354 			    4 * sizeof(int));
355 			if (setvec == 0 || tmpset == 0)
356 				overflo("out of space in cfoll()");
357 			maxsetvec *= 4;
358 		}
359 		for (i = 0; i <= f->accept; i++)
360 			setvec[i] = 0;
361 		setcnt = 0;
362 		follow(v);	/* computes setvec and setcnt */
363 		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
364 			overflo("out of space building follow set");
365 		f->re[info(v)].lfollow = p;
366 		*p = setcnt;
367 		for (i = f->accept; i >= 0; i--)
368 			if (setvec[i] == 1)
369 				*++p = i;
370 		break;
371 	UNARY
372 		cfoll(f,left(v));
373 		break;
374 	case CAT:
375 	case OR:
376 		cfoll(f,left(v));
377 		cfoll(f,right(v));
378 		break;
379 	default:	/* can't happen */
380 		FATAL("can't happen: unknown type %d in cfoll", type(v));
381 	}
382 }
383 
384 int first(Node *p)	/* collects initially active leaves of p into setvec */
385 			/* returns 0 if p matches empty string */
386 {
387 	int b, lp;
388 
389 	switch (type(p)) {
390 	ELEAF
391 	LEAF
392 		lp = info(p);	/* look for high-water mark of subscripts */
393 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
394 			setvec = reallocarray(setvec, maxsetvec,
395 			    4 * sizeof(int));
396 			tmpset = reallocarray(tmpset, maxsetvec,
397 			    4 * sizeof(int));
398 			if (setvec == 0 || tmpset == 0)
399 				overflo("out of space in first()");
400 			maxsetvec *= 4;
401 		}
402 		if (type(p) == EMPTYRE) {
403 			setvec[lp] = 0;
404 			return(0);
405 		}
406 		if (setvec[lp] != 1) {
407 			setvec[lp] = 1;
408 			setcnt++;
409 		}
410 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
411 			return(0);		/* empty CCL */
412 		else return(1);
413 	case PLUS:
414 		if (first(left(p)) == 0) return(0);
415 		return(1);
416 	case STAR:
417 	case QUEST:
418 		first(left(p));
419 		return(0);
420 	case CAT:
421 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
422 		return(1);
423 	case OR:
424 		b = first(right(p));
425 		if (first(left(p)) == 0 || b == 0) return(0);
426 		return(1);
427 	}
428 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
429 	return(-1);
430 }
431 
432 void follow(Node *v)	/* collects leaves that can follow v into setvec */
433 {
434 	Node *p;
435 
436 	if (type(v) == FINAL)
437 		return;
438 	p = parent(v);
439 	switch (type(p)) {
440 	case STAR:
441 	case PLUS:
442 		first(v);
443 		follow(p);
444 		return;
445 
446 	case OR:
447 	case QUEST:
448 		follow(p);
449 		return;
450 
451 	case CAT:
452 		if (v == left(p)) {	/* v is left child of p */
453 			if (first(right(p)) == 0) {
454 				follow(p);
455 				return;
456 			}
457 		} else		/* v is right child */
458 			follow(p);
459 		return;
460 	}
461 }
462 
463 int member(int c, const char *sarg)	/* is c in s? */
464 {
465 	uschar *s = (uschar *) sarg;
466 
467 	while (*s)
468 		if (c == *s++)
469 			return(1);
470 	return(0);
471 }
472 
473 int match(fa *f, const char *p0)	/* shortest match ? */
474 {
475 	int s, ns;
476 	uschar *p = (uschar *) p0;
477 
478 	s = f->reset ? makeinit(f,0) : f->initstat;
479 	if (f->out[s])
480 		return(1);
481 	do {
482 		/* assert(*p < NCHARS); */
483 		if ((ns = f->gototab[s][*p]) != 0)
484 			s = ns;
485 		else
486 			s = cgoto(f, s, *p);
487 		if (f->out[s])
488 			return(1);
489 	} while (*p++ != 0);
490 	return(0);
491 }
492 
493 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
494 {
495 	int s, ns;
496 	uschar *p = (uschar *) p0;
497 	uschar *q;
498 	int i, k;
499 
500 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
501 	if (f->reset) {
502 		f->initstat = s = makeinit(f,1);
503 	} else {
504 		s = f->initstat;
505 	}
506 	patbeg = (char *) p;
507 	patlen = -1;
508 	do {
509 		q = p;
510 		do {
511 			if (f->out[s])		/* final state */
512 				patlen = q-p;
513 			/* assert(*q < NCHARS); */
514 			if ((ns = f->gototab[s][*q]) != 0)
515 				s = ns;
516 			else
517 				s = cgoto(f, s, *q);
518 			if (s == 1) {	/* no transition */
519 				if (patlen >= 0) {
520 					patbeg = (char *) p;
521 					return(1);
522 				}
523 				else
524 					goto nextin;	/* no match */
525 			}
526 		} while (*q++ != 0);
527 		if (f->out[s])
528 			patlen = q-p-1;	/* don't count $ */
529 		if (patlen >= 0) {
530 			patbeg = (char *) p;
531 			return(1);
532 		}
533 	nextin:
534 		s = 2;
535 		if (f->reset) {
536 			for (i = 2; i <= f->curstat; i++)
537 				xfree(f->posns[i]);
538 			k = *f->posns[0];
539 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
540 				overflo("out of space in pmatch");
541 			for (i = 0; i <= k; i++)
542 				(f->posns[2])[i] = (f->posns[0])[i];
543 			f->initstat = f->curstat = 2;
544 			f->out[2] = f->out[0];
545 			for (i = 0; i < NCHARS; i++)
546 				f->gototab[2][i] = 0;
547 		}
548 	} while (*p++ != 0);
549 	return (0);
550 }
551 
552 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
553 {
554 	int s, ns;
555 	uschar *p = (uschar *) p0;
556 	uschar *q;
557 	int i, k;
558 
559 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
560 	if (f->reset) {
561 		f->initstat = s = makeinit(f,1);
562 	} else {
563 		s = f->initstat;
564 	}
565 	patlen = -1;
566 	while (*p) {
567 		q = p;
568 		do {
569 			if (f->out[s])		/* final state */
570 				patlen = q-p;
571 			/* assert(*q < NCHARS); */
572 			if ((ns = f->gototab[s][*q]) != 0)
573 				s = ns;
574 			else
575 				s = cgoto(f, s, *q);
576 			if (s == 1) {	/* no transition */
577 				if (patlen > 0) {
578 					patbeg = (char *) p;
579 					return(1);
580 				} else
581 					goto nnextin;	/* no nonempty match */
582 			}
583 		} while (*q++ != 0);
584 		if (f->out[s])
585 			patlen = q-p-1;	/* don't count $ */
586 		if (patlen > 0 ) {
587 			patbeg = (char *) p;
588 			return(1);
589 		}
590 	nnextin:
591 		s = 2;
592 		if (f->reset) {
593 			for (i = 2; i <= f->curstat; i++)
594 				xfree(f->posns[i]);
595 			k = *f->posns[0];
596 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
597 				overflo("out of state space");
598 			for (i = 0; i <= k; i++)
599 				(f->posns[2])[i] = (f->posns[0])[i];
600 			f->initstat = f->curstat = 2;
601 			f->out[2] = f->out[0];
602 			for (i = 0; i < NCHARS; i++)
603 				f->gototab[2][i] = 0;
604 		}
605 		p++;
606 	}
607 	return (0);
608 }
609 
610 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
611 {			/* uses relex() to scan regular expression */
612 	Node *np;
613 
614 	dprintf( ("reparse <%s>\n", p) );
615 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
616 	rtok = relex();
617 	/* GNU compatibility: an empty regexp matches anything */
618 	if (rtok == '\0') {
619 		/* FATAL("empty regular expression"); previous */
620 		return(op2(EMPTYRE, NIL, NIL));
621 	}
622 	np = regexp();
623 	if (rtok != '\0')
624 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
625 	return(np);
626 }
627 
628 Node *regexp(void)	/* top-level parse of reg expr */
629 {
630 	return (alt(concat(primary())));
631 }
632 
633 Node *primary(void)
634 {
635 	Node *np;
636 
637 	switch (rtok) {
638 	case CHAR:
639 		np = op2(CHAR, NIL, itonp(rlxval));
640 		rtok = relex();
641 		return (unary(np));
642 	case ALL:
643 		rtok = relex();
644 		return (unary(op2(ALL, NIL, NIL)));
645 	case EMPTYRE:
646 		rtok = relex();
647 		return (unary(op2(ALL, NIL, NIL)));
648 	case DOT:
649 		rtok = relex();
650 		return (unary(op2(DOT, NIL, NIL)));
651 	case CCL:
652 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
653 		rtok = relex();
654 		return (unary(np));
655 	case NCCL:
656 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
657 		rtok = relex();
658 		return (unary(np));
659 	case '^':
660 		rtok = relex();
661 		return (unary(op2(CHAR, NIL, itonp(HAT))));
662 	case '$':
663 		rtok = relex();
664 		return (unary(op2(CHAR, NIL, NIL)));
665 	case '(':
666 		rtok = relex();
667 		if (rtok == ')') {	/* special pleading for () */
668 			rtok = relex();
669 			return unary(op2(CCL, NIL, (Node *) tostring("")));
670 		}
671 		np = regexp();
672 		if (rtok == ')') {
673 			rtok = relex();
674 			return (unary(np));
675 		}
676 		else
677 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
678 	default:
679 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
680 	}
681 	return 0;	/*NOTREACHED*/
682 }
683 
684 Node *concat(Node *np)
685 {
686 	switch (rtok) {
687 	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
688 		return (concat(op2(CAT, np, primary())));
689 	}
690 	return (np);
691 }
692 
693 Node *alt(Node *np)
694 {
695 	if (rtok == OR) {
696 		rtok = relex();
697 		return (alt(op2(OR, np, concat(primary()))));
698 	}
699 	return (np);
700 }
701 
702 Node *unary(Node *np)
703 {
704 	switch (rtok) {
705 	case STAR:
706 		rtok = relex();
707 		return (unary(op2(STAR, np, NIL)));
708 	case PLUS:
709 		rtok = relex();
710 		return (unary(op2(PLUS, np, NIL)));
711 	case QUEST:
712 		rtok = relex();
713 		return (unary(op2(QUEST, np, NIL)));
714 	default:
715 		return (np);
716 	}
717 }
718 
719 /*
720  * Character class definitions conformant to the POSIX locale as
721  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
722  * and operating character sets are both ASCII (ISO646) or supersets
723  * thereof.
724  *
725  * Note that to avoid overflowing the temporary buffer used in
726  * relex(), the expanded character class (prior to range expansion)
727  * must be less than twice the size of their full name.
728  */
729 
730 /* Because isblank doesn't show up in any of the header files on any
731  * system i use, it's defined here.  if some other locale has a richer
732  * definition of "blank", define HAS_ISBLANK and provide your own
733  * version.
734  * the parentheses here are an attempt to find a path through the maze
735  * of macro definition and/or function and/or version provided.  thanks
736  * to nelson beebe for the suggestion; let's see if it works everywhere.
737  */
738 
739 #ifndef HAS_ISBLANK
740 
741 int (xisblank)(int c)
742 {
743 	return c==' ' || c=='\t';
744 }
745 
746 #endif
747 
748 struct charclass {
749 	const char *cc_name;
750 	int cc_namelen;
751 	int (*cc_func)(int);
752 } charclasses[] = {
753 	{ "alnum",	5,	isalnum },
754 	{ "alpha",	5,	isalpha },
755 #ifndef HAS_ISBLANK
756 	{ "blank",	5,	isspace }, /* was isblank */
757 #else
758 	{ "blank",	5,	isblank },
759 #endif
760 	{ "cntrl",	5,	iscntrl },
761 	{ "digit",	5,	isdigit },
762 	{ "graph",	5,	isgraph },
763 	{ "lower",	5,	islower },
764 	{ "print",	5,	isprint },
765 	{ "punct",	5,	ispunct },
766 	{ "space",	5,	isspace },
767 	{ "upper",	5,	isupper },
768 	{ "xdigit",	6,	isxdigit },
769 	{ NULL,		0,	NULL },
770 };
771 
772 
773 int relex(void)		/* lexical analyzer for reparse */
774 {
775 	int c, n;
776 	int cflag;
777 	static uschar *buf = 0;
778 	static int bufsz = 100;
779 	uschar *bp;
780 	struct charclass *cc;
781 	int i;
782 
783 	switch (c = *prestr++) {
784 	case '|': return OR;
785 	case '*': return STAR;
786 	case '+': return PLUS;
787 	case '?': return QUEST;
788 	case '.': return DOT;
789 	case '\0': prestr--; return '\0';
790 	case '^':
791 	case '$':
792 	case '(':
793 	case ')':
794 		return c;
795 	case '\\':
796 		rlxval = quoted(&prestr);
797 		return CHAR;
798 	default:
799 		rlxval = c;
800 		return CHAR;
801 	case '[':
802 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
803 			FATAL("out of space in reg expr %.10s..", lastre);
804 		bp = buf;
805 		if (*prestr == '^') {
806 			cflag = 1;
807 			prestr++;
808 		}
809 		else
810 			cflag = 0;
811 		n = 2 * strlen((const char *) prestr)+1;
812 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
813 			FATAL("out of space for reg expr %.10s...", lastre);
814 		for (; ; ) {
815 			if ((c = *prestr++) == '\\') {
816 				*bp++ = '\\';
817 				if ((c = *prestr++) == '\0')
818 					FATAL("nonterminated character class %.20s...", lastre);
819 				*bp++ = c;
820 			/* } else if (c == '\n') { */
821 			/* 	FATAL("newline in character class %.20s...", lastre); */
822 			} else if (c == '[' && *prestr == ':') {
823 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
824 				for (cc = charclasses; cc->cc_name; cc++)
825 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
826 						break;
827 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
828 				    prestr[2 + cc->cc_namelen] == ']') {
829 					prestr += cc->cc_namelen + 3;
830 					for (i = 0; i < NCHARS; i++) {
831 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
832 						    FATAL("out of space for reg expr %.10s...", lastre);
833 						if (cc->cc_func(i)) {
834 							*bp++ = i;
835 							n++;
836 						}
837 					}
838 				} else
839 					*bp++ = c;
840 			} else if (c == '\0') {
841 				FATAL("nonterminated character class %.20s", lastre);
842 			} else if (bp == buf) {	/* 1st char is special */
843 				*bp++ = c;
844 			} else if (c == ']') {
845 				*bp++ = 0;
846 				rlxstr = (uschar *) tostring((char *) buf);
847 				if (cflag == 0)
848 					return CCL;
849 				else
850 					return NCCL;
851 			} else
852 				*bp++ = c;
853 		}
854 	}
855 }
856 
857 int cgoto(fa *f, int s, int c)
858 {
859 	int i, j, k;
860 	int *p, *q;
861 
862 	assert(c == HAT || c < NCHARS);
863 	while (f->accept >= maxsetvec) {	/* guessing here! */
864 		setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int));
865 		tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int));
866 		if (setvec == 0 || tmpset == 0)
867 			overflo("out of space in cgoto()");
868 		maxsetvec *= 4;
869 	}
870 	for (i = 0; i <= f->accept; i++)
871 		setvec[i] = 0;
872 	setcnt = 0;
873 	/* compute positions of gototab[s,c] into setvec */
874 	p = f->posns[s];
875 	for (i = 1; i <= *p; i++) {
876 		if ((k = f->re[p[i]].ltype) != FINAL) {
877 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
878 			 || (k == DOT && c != 0 && c != HAT)
879 			 || (k == ALL && c != 0)
880 			 || (k == EMPTYRE && c != 0)
881 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
882 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
883 				q = f->re[p[i]].lfollow;
884 				for (j = 1; j <= *q; j++) {
885 					if (q[j] >= maxsetvec) {
886 						setvec = reallocarray(setvec,
887 						    maxsetvec, 4 * sizeof(int));
888 						tmpset = reallocarray(tmpset,
889 						    maxsetvec, 4 * sizeof(int));
890 						if (setvec == 0 || tmpset == 0)
891 							overflo("cgoto overflow");
892 						maxsetvec *= 4;
893 					}
894 					if (setvec[q[j]] == 0) {
895 						setcnt++;
896 						setvec[q[j]] = 1;
897 					}
898 				}
899 			}
900 		}
901 	}
902 	/* determine if setvec is a previous state */
903 	tmpset[0] = setcnt;
904 	j = 1;
905 	for (i = f->accept; i >= 0; i--)
906 		if (setvec[i]) {
907 			tmpset[j++] = i;
908 		}
909 	/* tmpset == previous state? */
910 	for (i = 1; i <= f->curstat; i++) {
911 		p = f->posns[i];
912 		if ((k = tmpset[0]) != p[0])
913 			goto different;
914 		for (j = 1; j <= k; j++)
915 			if (tmpset[j] != p[j])
916 				goto different;
917 		/* setvec is state i */
918 		f->gototab[s][c] = i;
919 		return i;
920 	  different:;
921 	}
922 
923 	/* add tmpset to current set of states */
924 	if (f->curstat >= NSTATES-1) {
925 		f->curstat = 2;
926 		f->reset = 1;
927 		for (i = 2; i < NSTATES; i++)
928 			xfree(f->posns[i]);
929 	} else
930 		++(f->curstat);
931 	for (i = 0; i < NCHARS; i++)
932 		f->gototab[f->curstat][i] = 0;
933 	xfree(f->posns[f->curstat]);
934 	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
935 		overflo("out of space in cgoto");
936 
937 	f->posns[f->curstat] = p;
938 	f->gototab[s][c] = f->curstat;
939 	for (i = 0; i <= setcnt; i++)
940 		p[i] = tmpset[i];
941 	if (setvec[f->accept])
942 		f->out[f->curstat] = 1;
943 	else
944 		f->out[f->curstat] = 0;
945 	return f->curstat;
946 }
947 
948 
949 void freefa(fa *f)	/* free a finite automaton */
950 {
951 	int i;
952 
953 	if (f == NULL)
954 		return;
955 	for (i = 0; i <= f->curstat; i++)
956 		xfree(f->posns[i]);
957 	for (i = 0; i <= f->accept; i++) {
958 		xfree(f->re[i].lfollow);
959 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
960 			xfree((f->re[i].lval.np));
961 	}
962 	xfree(f->restr);
963 	xfree(f);
964 }
965