xref: /openbsd-src/usr.bin/awk/b.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*	$OpenBSD: b.c,v 1.14 2007/09/02 15:19:31 deraadt Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 /* lasciate ogne speranza, voi ch'entrate. */
27 
28 #define	DEBUG
29 
30 #include <ctype.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include "awk.h"
35 #include "ytab.h"
36 
37 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
38 				/* NCHARS is 2**n */
39 #define MAXLIN 22
40 
41 #define type(v)		(v)->nobj	/* badly overloaded here */
42 #define info(v)		(v)->ntype	/* badly overloaded here */
43 #define left(v)		(v)->narg[0]
44 #define right(v)	(v)->narg[1]
45 #define parent(v)	(v)->nnext
46 
47 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define UNARY	case STAR: case PLUS: case QUEST:
49 
50 /* encoding in tree Nodes:
51 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
52 		left is index, right contains value or pointer to value
53 	unary (STAR, PLUS, QUEST): left is child, right is null
54 	binary (CAT, OR): left and right are children
55 	parent contains pointer to parent
56 */
57 
58 
59 int	*setvec;
60 int	*tmpset;
61 int	maxsetvec = 0;
62 
63 int	rtok;		/* next token in current re */
64 int	rlxval;
65 static uschar	*rlxstr;
66 static uschar	*prestr;	/* current position in current re */
67 static uschar	*lastre;	/* origin of last re */
68 
69 static	int setcnt;
70 static	int poscnt;
71 
72 char	*patbeg;
73 int	patlen;
74 
75 #define	NFA	20	/* cache this many dynamic fa's */
76 fa	*fatab[NFA];
77 int	nfatab	= 0;	/* entries in fatab */
78 
79 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
80 {
81 	int i, use, nuse;
82 	fa *pfa;
83 	static int now = 1;
84 
85 	if (setvec == 0) {	/* first time through any RE */
86 		maxsetvec = MAXLIN;
87 		setvec = (int *) calloc(maxsetvec, sizeof(int));
88 		tmpset = (int *) calloc(maxsetvec, sizeof(int));
89 		if (setvec == 0 || tmpset == 0)
90 			overflo("out of space initializing makedfa");
91 	}
92 
93 	if (compile_time)	/* a constant for sure */
94 		return mkdfa(s, anchor);
95 	for (i = 0; i < nfatab; i++)	/* is it there already? */
96 		if (fatab[i]->anchor == anchor
97 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
98 			fatab[i]->use = now++;
99 			return fatab[i];
100 		}
101 	pfa = mkdfa(s, anchor);
102 	if (nfatab < NFA) {	/* room for another */
103 		fatab[nfatab] = pfa;
104 		fatab[nfatab]->use = now++;
105 		nfatab++;
106 		return pfa;
107 	}
108 	use = fatab[0]->use;	/* replace least-recently used */
109 	nuse = 0;
110 	for (i = 1; i < nfatab; i++)
111 		if (fatab[i]->use < use) {
112 			use = fatab[i]->use;
113 			nuse = i;
114 		}
115 	freefa(fatab[nuse]);
116 	fatab[nuse] = pfa;
117 	pfa->use = now++;
118 	return pfa;
119 }
120 
121 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
122 				/* anchor = 1 for anchored matches, else 0 */
123 {
124 	Node *p, *p1;
125 	fa *f;
126 
127 	p = reparse(s);
128 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129 		/* put ALL STAR in front of reg.  exp. */
130 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131 		/* put FINAL after reg.  exp. */
132 
133 	poscnt = 0;
134 	penter(p1);	/* enter parent pointers and leaf indices */
135 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136 		overflo("out of space for fa");
137 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
138 	cfoll(f, p1);	/* set up follow sets */
139 	freetr(p1);
140 	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
141 			overflo("out of space in makedfa");
142 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143 		overflo("out of space in makedfa");
144 	*f->posns[1] = 0;
145 	f->initstat = makeinit(f, anchor);
146 	f->anchor = anchor;
147 	f->restr = (uschar *) tostring(s);
148 	return f;
149 }
150 
151 int makeinit(fa *f, int anchor)
152 {
153 	int i, k;
154 
155 	f->curstat = 2;
156 	f->out[2] = 0;
157 	f->reset = 0;
158 	k = *(f->re[0].lfollow);
159 	xfree(f->posns[2]);
160 	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
161 		overflo("out of space in makeinit");
162 	for (i=0; i <= k; i++) {
163 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
164 	}
165 	if ((f->posns[2])[1] == f->accept)
166 		f->out[2] = 1;
167 	for (i=0; i < NCHARS; i++)
168 		f->gototab[2][i] = 0;
169 	f->curstat = cgoto(f, 2, HAT);
170 	if (anchor) {
171 		*f->posns[2] = k-1;	/* leave out position 0 */
172 		for (i=0; i < k; i++) {
173 			(f->posns[0])[i] = (f->posns[2])[i];
174 		}
175 
176 		f->out[0] = f->out[2];
177 		if (f->curstat != 2)
178 			--(*f->posns[f->curstat]);
179 	}
180 	return f->curstat;
181 }
182 
183 void penter(Node *p)	/* set up parent pointers and leaf indices */
184 {
185 	switch (type(p)) {
186 	LEAF
187 		info(p) = poscnt;
188 		poscnt++;
189 		break;
190 	UNARY
191 		penter(left(p));
192 		parent(left(p)) = p;
193 		break;
194 	case CAT:
195 	case OR:
196 		penter(left(p));
197 		penter(right(p));
198 		parent(left(p)) = p;
199 		parent(right(p)) = p;
200 		break;
201 	default:	/* can't happen */
202 		FATAL("can't happen: unknown type %d in penter", type(p));
203 		break;
204 	}
205 }
206 
207 void freetr(Node *p)	/* free parse tree */
208 {
209 	switch (type(p)) {
210 	LEAF
211 		xfree(p);
212 		break;
213 	UNARY
214 		freetr(left(p));
215 		xfree(p);
216 		break;
217 	case CAT:
218 	case OR:
219 		freetr(left(p));
220 		freetr(right(p));
221 		xfree(p);
222 		break;
223 	default:	/* can't happen */
224 		FATAL("can't happen: unknown type %d in freetr", type(p));
225 		break;
226 	}
227 }
228 
229 /* in the parsing of regular expressions, metacharacters like . have */
230 /* to be seen literally;  \056 is not a metacharacter. */
231 
232 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
233 {			/* only pick up one 8-bit byte (2 chars) */
234 	uschar *p;
235 	int n = 0;
236 	int i;
237 
238 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
239 		if (isdigit(*p))
240 			n = 16 * n + *p - '0';
241 		else if (*p >= 'a' && *p <= 'f')
242 			n = 16 * n + *p - 'a' + 10;
243 		else if (*p >= 'A' && *p <= 'F')
244 			n = 16 * n + *p - 'A' + 10;
245 	}
246 	*pp = (char *) p;
247 	return n;
248 }
249 
250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
251 
252 int quoted(char **pp)	/* pick up next thing after a \\ */
253 			/* and increment *pp */
254 {
255 	char *p = *pp;
256 	int c;
257 
258 	if ((c = *p++) == 't')
259 		c = '\t';
260 	else if (c == 'n')
261 		c = '\n';
262 	else if (c == 'f')
263 		c = '\f';
264 	else if (c == 'r')
265 		c = '\r';
266 	else if (c == 'b')
267 		c = '\b';
268 	else if (c == '\\')
269 		c = '\\';
270 	else if (c == 'x') {	/* hexadecimal goo follows */
271 		c = hexstr(&p);	/* this adds a null if number is invalid */
272 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
273 		int n = c - '0';
274 		if (isoctdigit(*p)) {
275 			n = 8 * n + *p++ - '0';
276 			if (isoctdigit(*p))
277 				n = 8 * n + *p++ - '0';
278 		}
279 		c = n;
280 	} /* else */
281 		/* c = c; */
282 	*pp = p;
283 	return c;
284 }
285 
286 char *cclenter(const char *argp)	/* add a character class */
287 {
288 	int i, c, c2;
289 	uschar *p = (uschar *) argp;
290 	uschar *op, *bp;
291 	static uschar *buf = 0;
292 	static int bufsz = 100;
293 
294 	op = p;
295 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
296 		FATAL("out of space for character class [%.10s...] 1", p);
297 	bp = buf;
298 	for (i = 0; (c = *p++) != 0; ) {
299 		if (c == '\\') {
300 			c = quoted((char **) &p);
301 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
302 			if (*p != 0) {
303 				c = bp[-1];
304 				c2 = *p++;
305 				if (c2 == '\\')
306 					c2 = quoted((char **) &p);
307 				if (c > c2) {	/* empty; ignore */
308 					bp--;
309 					i--;
310 					continue;
311 				}
312 				while (c < c2) {
313 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
314 						FATAL("out of space for character class [%.10s...] 2", p);
315 					*bp++ = ++c;
316 					i++;
317 				}
318 				continue;
319 			}
320 		}
321 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
322 			FATAL("out of space for character class [%.10s...] 3", p);
323 		*bp++ = c;
324 		i++;
325 	}
326 	*bp = 0;
327 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
328 	xfree(op);
329 	return (char *) tostring((char *) buf);
330 }
331 
332 void overflo(const char *s)
333 {
334 	FATAL("regular expression too big: %.30s...", s);
335 }
336 
337 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
338 {
339 	int i;
340 	int *p;
341 
342 	switch (type(v)) {
343 	LEAF
344 		f->re[info(v)].ltype = type(v);
345 		f->re[info(v)].lval.np = right(v);
346 		while (f->accept >= maxsetvec) {	/* guessing here! */
347 			maxsetvec *= 4;
348 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
349 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
350 			if (setvec == 0 || tmpset == 0)
351 				overflo("out of space in cfoll()");
352 		}
353 		for (i = 0; i <= f->accept; i++)
354 			setvec[i] = 0;
355 		setcnt = 0;
356 		follow(v);	/* computes setvec and setcnt */
357 		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
358 			overflo("out of space building follow set");
359 		f->re[info(v)].lfollow = p;
360 		*p = setcnt;
361 		for (i = f->accept; i >= 0; i--)
362 			if (setvec[i] == 1)
363 				*++p = i;
364 		break;
365 	UNARY
366 		cfoll(f,left(v));
367 		break;
368 	case CAT:
369 	case OR:
370 		cfoll(f,left(v));
371 		cfoll(f,right(v));
372 		break;
373 	default:	/* can't happen */
374 		FATAL("can't happen: unknown type %d in cfoll", type(v));
375 	}
376 }
377 
378 int first(Node *p)	/* collects initially active leaves of p into setvec */
379 			/* returns 1 if p matches empty string */
380 {
381 	int b, lp;
382 
383 	switch (type(p)) {
384 	LEAF
385 		lp = info(p);	/* look for high-water mark of subscripts */
386 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
387 			maxsetvec *= 4;
388 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
389 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
390 			if (setvec == 0 || tmpset == 0)
391 				overflo("out of space in first()");
392 		}
393 		if (setvec[lp] != 1) {
394 			setvec[lp] = 1;
395 			setcnt++;
396 		}
397 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
398 			return(0);		/* empty CCL */
399 		else return(1);
400 	case PLUS:
401 		if (first(left(p)) == 0) return(0);
402 		return(1);
403 	case STAR:
404 	case QUEST:
405 		first(left(p));
406 		return(0);
407 	case CAT:
408 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
409 		return(1);
410 	case OR:
411 		b = first(right(p));
412 		if (first(left(p)) == 0 || b == 0) return(0);
413 		return(1);
414 	}
415 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
416 	return(-1);
417 }
418 
419 void follow(Node *v)	/* collects leaves that can follow v into setvec */
420 {
421 	Node *p;
422 
423 	if (type(v) == FINAL)
424 		return;
425 	p = parent(v);
426 	switch (type(p)) {
427 	case STAR:
428 	case PLUS:
429 		first(v);
430 		follow(p);
431 		return;
432 
433 	case OR:
434 	case QUEST:
435 		follow(p);
436 		return;
437 
438 	case CAT:
439 		if (v == left(p)) {	/* v is left child of p */
440 			if (first(right(p)) == 0) {
441 				follow(p);
442 				return;
443 			}
444 		} else		/* v is right child */
445 			follow(p);
446 		return;
447 	}
448 }
449 
450 int member(int c, const char *sarg)	/* is c in s? */
451 {
452 	uschar *s = (uschar *) sarg;
453 
454 	while (*s)
455 		if (c == *s++)
456 			return(1);
457 	return(0);
458 }
459 
460 int match(fa *f, const char *p0)	/* shortest match ? */
461 {
462 	int s, ns;
463 	uschar *p = (uschar *) p0;
464 
465 	s = f->reset ? makeinit(f,0) : f->initstat;
466 	if (f->out[s])
467 		return(1);
468 	do {
469 		assert(*p < NCHARS);
470 		if ((ns = f->gototab[s][*p]) != 0)
471 			s = ns;
472 		else
473 			s = cgoto(f, s, *p);
474 		if (f->out[s])
475 			return(1);
476 	} while (*p++ != 0);
477 	return(0);
478 }
479 
480 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
481 {
482 	int s, ns;
483 	uschar *p = (uschar *) p0;
484 	uschar *q;
485 	int i, k;
486 
487 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
488 	if (f->reset) {
489 		f->initstat = s = makeinit(f,1);
490 	} else {
491 		s = f->initstat;
492 	}
493 	patbeg = (char *) p;
494 	patlen = -1;
495 	do {
496 		q = p;
497 		do {
498 			if (f->out[s])		/* final state */
499 				patlen = q-p;
500 			assert(*q < NCHARS);
501 			if ((ns = f->gototab[s][*q]) != 0)
502 				s = ns;
503 			else
504 				s = cgoto(f, s, *q);
505 			if (s == 1) {	/* no transition */
506 				if (patlen >= 0) {
507 					patbeg = (char *) p;
508 					return(1);
509 				}
510 				else
511 					goto nextin;	/* no match */
512 			}
513 		} while (*q++ != 0);
514 		if (f->out[s])
515 			patlen = q-p-1;	/* don't count $ */
516 		if (patlen >= 0) {
517 			patbeg = (char *) p;
518 			return(1);
519 		}
520 	nextin:
521 		s = 2;
522 		if (f->reset) {
523 			for (i = 2; i <= f->curstat; i++)
524 				xfree(f->posns[i]);
525 			k = *f->posns[0];
526 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
527 				overflo("out of space in pmatch");
528 			for (i = 0; i <= k; i++)
529 				(f->posns[2])[i] = (f->posns[0])[i];
530 			f->initstat = f->curstat = 2;
531 			f->out[2] = f->out[0];
532 			for (i = 0; i < NCHARS; i++)
533 				f->gototab[2][i] = 0;
534 		}
535 	} while (*p++ != 0);
536 	return (0);
537 }
538 
539 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
540 {
541 	int s, ns;
542 	uschar *p = (uschar *) p0;
543 	uschar *q;
544 	int i, k;
545 
546 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
547 	if (f->reset) {
548 		f->initstat = s = makeinit(f,1);
549 	} else {
550 		s = f->initstat;
551 	}
552 	patlen = -1;
553 	while (*p) {
554 		q = p;
555 		do {
556 			if (f->out[s])		/* final state */
557 				patlen = q-p;
558 			assert(*q < NCHARS);
559 			if ((ns = f->gototab[s][*q]) != 0)
560 				s = ns;
561 			else
562 				s = cgoto(f, s, *q);
563 			if (s == 1) {	/* no transition */
564 				if (patlen > 0) {
565 					patbeg = (char *) p;
566 					return(1);
567 				} else
568 					goto nnextin;	/* no nonempty match */
569 			}
570 		} while (*q++ != 0);
571 		if (f->out[s])
572 			patlen = q-p-1;	/* don't count $ */
573 		if (patlen > 0 ) {
574 			patbeg = (char *) p;
575 			return(1);
576 		}
577 	nnextin:
578 		s = 2;
579 		if (f->reset) {
580 			for (i = 2; i <= f->curstat; i++)
581 				xfree(f->posns[i]);
582 			k = *f->posns[0];
583 			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
584 				overflo("out of state space");
585 			for (i = 0; i <= k; i++)
586 				(f->posns[2])[i] = (f->posns[0])[i];
587 			f->initstat = f->curstat = 2;
588 			f->out[2] = f->out[0];
589 			for (i = 0; i < NCHARS; i++)
590 				f->gototab[2][i] = 0;
591 		}
592 		p++;
593 	}
594 	return (0);
595 }
596 
597 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
598 {			/* uses relex() to scan regular expression */
599 	Node *np;
600 
601 	dprintf( ("reparse <%s>\n", p) );
602 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
603 	rtok = relex();
604 	/* GNU compatibility: an empty regexp matches anything */
605 	if (rtok == '\0')
606 		/* FATAL("empty regular expression"); previous */
607 		return(op2(ALL, NIL, NIL));
608 	np = regexp();
609 	if (rtok != '\0')
610 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
611 	return(np);
612 }
613 
614 Node *regexp(void)	/* top-level parse of reg expr */
615 {
616 	return (alt(concat(primary())));
617 }
618 
619 Node *primary(void)
620 {
621 	Node *np;
622 
623 	switch (rtok) {
624 	case CHAR:
625 		np = op2(CHAR, NIL, itonp(rlxval));
626 		rtok = relex();
627 		return (unary(np));
628 	case ALL:
629 		rtok = relex();
630 		return (unary(op2(ALL, NIL, NIL)));
631 	case DOT:
632 		rtok = relex();
633 		return (unary(op2(DOT, NIL, NIL)));
634 	case CCL:
635 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
636 		rtok = relex();
637 		return (unary(np));
638 	case NCCL:
639 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
640 		rtok = relex();
641 		return (unary(np));
642 	case '^':
643 		rtok = relex();
644 		return (unary(op2(CHAR, NIL, itonp(HAT))));
645 	case '$':
646 		rtok = relex();
647 		return (unary(op2(CHAR, NIL, NIL)));
648 	case '(':
649 		rtok = relex();
650 		if (rtok == ')') {	/* special pleading for () */
651 			rtok = relex();
652 			return unary(op2(CCL, NIL, (Node *) tostring("")));
653 		}
654 		np = regexp();
655 		if (rtok == ')') {
656 			rtok = relex();
657 			return (unary(np));
658 		}
659 		else
660 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
661 	default:
662 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
663 	}
664 	return 0;	/*NOTREACHED*/
665 }
666 
667 Node *concat(Node *np)
668 {
669 	switch (rtok) {
670 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
671 		return (concat(op2(CAT, np, primary())));
672 	}
673 	return (np);
674 }
675 
676 Node *alt(Node *np)
677 {
678 	if (rtok == OR) {
679 		rtok = relex();
680 		return (alt(op2(OR, np, concat(primary()))));
681 	}
682 	return (np);
683 }
684 
685 Node *unary(Node *np)
686 {
687 	switch (rtok) {
688 	case STAR:
689 		rtok = relex();
690 		return (unary(op2(STAR, np, NIL)));
691 	case PLUS:
692 		rtok = relex();
693 		return (unary(op2(PLUS, np, NIL)));
694 	case QUEST:
695 		rtok = relex();
696 		return (unary(op2(QUEST, np, NIL)));
697 	default:
698 		return (np);
699 	}
700 }
701 
702 /*
703  * Character class definitions conformant to the POSIX locale as
704  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
705  * and operating character sets are both ASCII (ISO646) or supersets
706  * thereof.
707  *
708  * Note that to avoid overflowing the temporary buffer used in
709  * relex(), the expanded character class (prior to range expansion)
710  * must be less than twice the size of their full name.
711  */
712 
713 /* Because isblank doesn't show up in any of the header files on any
714  * system i use, it's defined here.  if some other locale has a richer
715  * definition of "blank", define HAS_ISBLANK and provide your own
716  * version.
717  * the parentheses here are an attempt to find a path through the maze
718  * of macro definition and/or function and/or version provided.  thanks
719  * to nelson beebe for the suggestion; let's see if it works everywhere.
720  */
721 
722 #ifndef HAS_ISBLANK
723 
724 int (isblank)(int c)
725 {
726 	return c==' ' || c=='\t';
727 }
728 
729 #endif
730 
731 struct charclass {
732 	const char *cc_name;
733 	int cc_namelen;
734 	int (*cc_func)(int);
735 } charclasses[] = {
736 	{ "alnum",	5,	isalnum },
737 	{ "alpha",	5,	isalpha },
738 	{ "blank",	5,	isblank },
739 	{ "cntrl",	5,	iscntrl },
740 	{ "digit",	5,	isdigit },
741 	{ "graph",	5,	isgraph },
742 	{ "lower",	5,	islower },
743 	{ "print",	5,	isprint },
744 	{ "punct",	5,	ispunct },
745 	{ "space",	5,	isspace },
746 	{ "upper",	5,	isupper },
747 	{ "xdigit",	6,	isxdigit },
748 	{ NULL,		0,	NULL },
749 };
750 
751 
752 int relex(void)		/* lexical analyzer for reparse */
753 {
754 	int c, n;
755 	int cflag;
756 	static uschar *buf = 0;
757 	static int bufsz = 100;
758 	uschar *bp;
759 	struct charclass *cc;
760 	int i;
761 
762 	switch (c = *prestr++) {
763 	case '|': return OR;
764 	case '*': return STAR;
765 	case '+': return PLUS;
766 	case '?': return QUEST;
767 	case '.': return DOT;
768 	case '\0': prestr--; return '\0';
769 	case '^':
770 	case '$':
771 	case '(':
772 	case ')':
773 		return c;
774 	case '\\':
775 		rlxval = quoted((char **) &prestr);
776 		return CHAR;
777 	default:
778 		rlxval = c;
779 		return CHAR;
780 	case '[':
781 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
782 			FATAL("out of space in reg expr %.10s..", lastre);
783 		bp = buf;
784 		if (*prestr == '^') {
785 			cflag = 1;
786 			prestr++;
787 		}
788 		else
789 			cflag = 0;
790 		n = 2 * strlen((const char *) prestr)+1;
791 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
792 			FATAL("out of space for reg expr %.10s...", lastre);
793 		for (; ; ) {
794 			if ((c = *prestr++) == '\\') {
795 				*bp++ = '\\';
796 				if ((c = *prestr++) == '\0')
797 					FATAL("nonterminated character class %.20s...", lastre);
798 				*bp++ = c;
799 			/* } else if (c == '\n') { */
800 			/* 	FATAL("newline in character class %.20s...", lastre); */
801 			} else if (c == '[' && *prestr == ':') {
802 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
803 				for (cc = charclasses; cc->cc_name; cc++)
804 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
805 						break;
806 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
807 				    prestr[2 + cc->cc_namelen] == ']') {
808 					prestr += cc->cc_namelen + 3;
809 					for (i = 0; i < NCHARS; i++) {
810 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
811 						    FATAL("out of space for reg expr %.10s...", lastre);
812 						if (cc->cc_func(i)) {
813 							*bp++ = i;
814 							n++;
815 						}
816 					}
817 				} else
818 					*bp++ = c;
819 			} else if (c == '\0') {
820 				FATAL("nonterminated character class %.20s", lastre);
821 			} else if (bp == buf) {	/* 1st char is special */
822 				*bp++ = c;
823 			} else if (c == ']') {
824 				*bp++ = 0;
825 				rlxstr = (uschar *) tostring((char *) buf);
826 				if (cflag == 0)
827 					return CCL;
828 				else
829 					return NCCL;
830 			} else
831 				*bp++ = c;
832 		}
833 	}
834 }
835 
836 int cgoto(fa *f, int s, int c)
837 {
838 	int i, j, k;
839 	int *p, *q;
840 
841 	assert(c == HAT || c < NCHARS);
842 	while (f->accept >= maxsetvec) {	/* guessing here! */
843 		maxsetvec *= 4;
844 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
845 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
846 		if (setvec == 0 || tmpset == 0)
847 			overflo("out of space in cgoto()");
848 	}
849 	for (i = 0; i <= f->accept; i++)
850 		setvec[i] = 0;
851 	setcnt = 0;
852 	/* compute positions of gototab[s,c] into setvec */
853 	p = f->posns[s];
854 	for (i = 1; i <= *p; i++) {
855 		if ((k = f->re[p[i]].ltype) != FINAL) {
856 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
857 			 || (k == DOT && c != 0 && c != HAT)
858 			 || (k == ALL && c != 0)
859 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
860 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
861 				q = f->re[p[i]].lfollow;
862 				for (j = 1; j <= *q; j++) {
863 					if (q[j] >= maxsetvec) {
864 						maxsetvec *= 4;
865 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
866 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
867 						if (setvec == 0 || tmpset == 0)
868 							overflo("cgoto overflow");
869 					}
870 					if (setvec[q[j]] == 0) {
871 						setcnt++;
872 						setvec[q[j]] = 1;
873 					}
874 				}
875 			}
876 		}
877 	}
878 	/* determine if setvec is a previous state */
879 	tmpset[0] = setcnt;
880 	j = 1;
881 	for (i = f->accept; i >= 0; i--)
882 		if (setvec[i]) {
883 			tmpset[j++] = i;
884 		}
885 	/* tmpset == previous state? */
886 	for (i = 1; i <= f->curstat; i++) {
887 		p = f->posns[i];
888 		if ((k = tmpset[0]) != p[0])
889 			goto different;
890 		for (j = 1; j <= k; j++)
891 			if (tmpset[j] != p[j])
892 				goto different;
893 		/* setvec is state i */
894 		f->gototab[s][c] = i;
895 		return i;
896 	  different:;
897 	}
898 
899 	/* add tmpset to current set of states */
900 	if (f->curstat >= NSTATES-1) {
901 		f->curstat = 2;
902 		f->reset = 1;
903 		for (i = 2; i < NSTATES; i++)
904 			xfree(f->posns[i]);
905 	} else
906 		++(f->curstat);
907 	for (i = 0; i < NCHARS; i++)
908 		f->gototab[f->curstat][i] = 0;
909 	xfree(f->posns[f->curstat]);
910 	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
911 		overflo("out of space in cgoto");
912 
913 	f->posns[f->curstat] = p;
914 	f->gototab[s][c] = f->curstat;
915 	for (i = 0; i <= setcnt; i++)
916 		p[i] = tmpset[i];
917 	if (setvec[f->accept])
918 		f->out[f->curstat] = 1;
919 	else
920 		f->out[f->curstat] = 0;
921 	return f->curstat;
922 }
923 
924 
925 void freefa(fa *f)	/* free a finite automaton */
926 {
927 	int i;
928 
929 	if (f == NULL)
930 		return;
931 	for (i = 0; i <= f->curstat; i++)
932 		xfree(f->posns[i]);
933 	for (i = 0; i <= f->accept; i++) {
934 		xfree(f->re[i].lfollow);
935 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
936 			xfree((f->re[i].lval.np));
937 	}
938 	xfree(f->restr);
939 	xfree(f);
940 }
941