xref: /csrg-svn/lib/libc/regex/regcomp.c (revision 66362)
155847Sbostic /*-
2*66362Sbostic  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3*66362Sbostic  * Copyright (c) 1992, 1993, 1994
461162Sbostic  *	The Regents of the University of California.  All rights reserved.
555847Sbostic  *
655847Sbostic  * This code is derived from software contributed to Berkeley by
755847Sbostic  * Henry Spencer of the University of Toronto.
855847Sbostic  *
955847Sbostic  * %sccs.include.redist.c%
1055847Sbostic  *
11*66362Sbostic  *	@(#)regcomp.c	8.2 (Berkeley) 03/16/94
1255847Sbostic  */
1355847Sbostic 
1455847Sbostic #if defined(LIBC_SCCS) && !defined(lint)
15*66362Sbostic static char sccsid[] = "@(#)regcomp.c	8.2 (Berkeley) 03/16/94";
1655847Sbostic #endif /* LIBC_SCCS and not lint */
1755847Sbostic 
1855847Sbostic #include <sys/types.h>
1955847Sbostic #include <stdio.h>
2055847Sbostic #include <string.h>
2155847Sbostic #include <ctype.h>
2255847Sbostic #include <limits.h>
2355847Sbostic #include <stdlib.h>
2455847Sbostic #include <regex.h>
2555847Sbostic 
2655847Sbostic #include "utils.h"
2755847Sbostic #include "regex2.h"
2855847Sbostic 
2955847Sbostic #include "cclass.h"
3055847Sbostic #include "cname.h"
3155847Sbostic 
3255847Sbostic /*
3355847Sbostic  * parse structure, passed up and down to avoid global variables and
3455847Sbostic  * other clumsinesses
3555847Sbostic  */
3655847Sbostic struct parse {
3760201Sbostic 	char *next;		/* next character in RE */
3860201Sbostic 	char *end;		/* end of string (-> NUL normally) */
3955847Sbostic 	int error;		/* has an error been seen? */
4055847Sbostic 	sop *strip;		/* malloced strip */
4155847Sbostic 	sopno ssize;		/* malloced strip size (allocated) */
4255847Sbostic 	sopno slen;		/* malloced strip length (used) */
4355847Sbostic 	int ncsalloc;		/* number of csets allocated */
4455847Sbostic 	struct re_guts *g;
4555847Sbostic #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
4655847Sbostic 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
4755847Sbostic 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
4855847Sbostic };
4955847Sbostic 
50*66362Sbostic /* ========= begin header generated by ./mkh ========= */
51*66362Sbostic #ifdef __cplusplus
52*66362Sbostic extern "C" {
53*66362Sbostic #endif
5456355Sbostic 
55*66362Sbostic /* === regcomp.c === */
56*66362Sbostic static void p_ere(register struct parse *p, int stop);
57*66362Sbostic static void p_ere_exp(register struct parse *p);
58*66362Sbostic static void p_str(register struct parse *p);
59*66362Sbostic static void p_bre(register struct parse *p, register int end1, register int end2);
60*66362Sbostic static int p_simp_re(register struct parse *p, int starordinary);
61*66362Sbostic static int p_count(register struct parse *p);
62*66362Sbostic static void p_bracket(register struct parse *p);
63*66362Sbostic static void p_b_term(register struct parse *p, register cset *cs);
64*66362Sbostic static void p_b_cclass(register struct parse *p, register cset *cs);
65*66362Sbostic static void p_b_eclass(register struct parse *p, register cset *cs);
66*66362Sbostic static char p_b_symbol(register struct parse *p);
67*66362Sbostic static char p_b_coll_elem(register struct parse *p, int endc);
68*66362Sbostic static char othercase(int ch);
69*66362Sbostic static void bothcases(register struct parse *p, int ch);
70*66362Sbostic static void ordinary(register struct parse *p, register int ch);
71*66362Sbostic static void nonnewline(register struct parse *p);
72*66362Sbostic static void repeat(register struct parse *p, sopno start, int from, int to);
73*66362Sbostic static int seterr(register struct parse *p, int e);
74*66362Sbostic static cset *allocset(register struct parse *p);
75*66362Sbostic static void freeset(register struct parse *p, register cset *cs);
76*66362Sbostic static int freezeset(register struct parse *p, register cset *cs);
77*66362Sbostic static int firstch(register struct parse *p, register cset *cs);
78*66362Sbostic static int nch(register struct parse *p, register cset *cs);
79*66362Sbostic static void mcadd(register struct parse *p, register cset *cs, register char *cp);
80*66362Sbostic static void mcsub(register cset *cs, register char *cp);
81*66362Sbostic static int mcin(register cset *cs, register char *cp);
82*66362Sbostic static char *mcfind(register cset *cs, register char *cp);
83*66362Sbostic static void mcinvert(register struct parse *p, register cset *cs);
84*66362Sbostic static void mccase(register struct parse *p, register cset *cs);
85*66362Sbostic static int isinsets(register struct re_guts *g, int c);
86*66362Sbostic static int samesets(register struct re_guts *g, int c1, int c2);
87*66362Sbostic static void categorize(struct parse *p, register struct re_guts *g);
88*66362Sbostic static sopno dupl(register struct parse *p, sopno start, sopno finish);
89*66362Sbostic static void doemit(register struct parse *p, sop op, size_t opnd);
90*66362Sbostic static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
91*66362Sbostic static void dofwd(register struct parse *p, sopno pos, sop value);
92*66362Sbostic static void enlarge(register struct parse *p, sopno size);
93*66362Sbostic static void stripsnug(register struct parse *p, register struct re_guts *g);
94*66362Sbostic static void findmust(register struct parse *p, register struct re_guts *g);
95*66362Sbostic static sopno pluscount(register struct parse *p, register struct re_guts *g);
9660201Sbostic 
97*66362Sbostic #ifdef __cplusplus
98*66362Sbostic }
99*66362Sbostic #endif
100*66362Sbostic /* ========= end header generated by ./mkh ========= */
101*66362Sbostic 
10260201Sbostic static char nuls[10];		/* place to point scanner in event of error */
10360201Sbostic 
10455847Sbostic /*
10555847Sbostic  * macros for use with parse structure
10655847Sbostic  * BEWARE:  these know that the parse structure is named `p' !!!
10755847Sbostic  */
10860201Sbostic #define	PEEK()	(*p->next)
10960201Sbostic #define	PEEK2()	(*(p->next+1))
11060201Sbostic #define	MORE()	(p->next < p->end)
11160201Sbostic #define	MORE2()	(p->next+1 < p->end)
11260201Sbostic #define	SEE(c)	(MORE() && PEEK() == (c))
11360201Sbostic #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
11455847Sbostic #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
11555847Sbostic #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
11655847Sbostic #define	NEXT()	(p->next++)
11755847Sbostic #define	NEXT2()	(p->next += 2)
11855847Sbostic #define	NEXTn(n)	(p->next += (n))
11960201Sbostic #define	GETNEXT()	(*p->next++)
12055847Sbostic #define	SETERROR(e)	seterr(p, (e))
12155847Sbostic #define	REQUIRE(co, e)	((co) || SETERROR(e))
12260201Sbostic #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
12360201Sbostic #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
12460201Sbostic #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
125*66362Sbostic #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
126*66362Sbostic #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
12760201Sbostic #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
12860201Sbostic #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
12955847Sbostic #define	HERE()		(p->slen)
13055847Sbostic #define	THERE()		(p->slen - 1)
13155847Sbostic #define	DROP(n)	(p->slen -= (n))
13255847Sbostic 
13360201Sbostic #ifndef NDEBUG
13460201Sbostic static int never = 0;		/* for use in asserts; shuts lint up */
135*66362Sbostic #else
136*66362Sbostic #define	never	0		/* some <assert.h>s have bugs too */
13760201Sbostic #endif
13856357Sbostic 
13955847Sbostic /*
14055847Sbostic  - regcomp - interface for parser and compilation
141*66362Sbostic  = extern int regcomp(regex_t *, const char *, int);
14260201Sbostic  = #define	REG_BASIC	0000
14360201Sbostic  = #define	REG_EXTENDED	0001
14460201Sbostic  = #define	REG_ICASE	0002
14560201Sbostic  = #define	REG_NOSUB	0004
14660201Sbostic  = #define	REG_NEWLINE	0010
14760201Sbostic  = #define	REG_NOSPEC	0020
14860201Sbostic  = #define	REG_PEND	0040
14960201Sbostic  = #define	REG_DUMP	0200
15055847Sbostic  */
15155847Sbostic int				/* 0 success, otherwise REG_something */
15255847Sbostic regcomp(preg, pattern, cflags)
15355847Sbostic regex_t *preg;
15455847Sbostic const char *pattern;
15555847Sbostic int cflags;
15655847Sbostic {
15755847Sbostic 	struct parse pa;
15855847Sbostic 	register struct re_guts *g;
15955847Sbostic 	register struct parse *p = &pa;
16055847Sbostic 	register int i;
16160201Sbostic 	register size_t len;
162*66362Sbostic #ifdef REDEBUG
163*66362Sbostic #	define	GOODFLAGS(f)	(f)
164*66362Sbostic #else
165*66362Sbostic #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
166*66362Sbostic #endif
16755847Sbostic 
168*66362Sbostic 	cflags = GOODFLAGS(cflags);
16960201Sbostic 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
17060201Sbostic 		return(REG_INVARG);
17160201Sbostic 
17260201Sbostic 	if (cflags&REG_PEND) {
17360201Sbostic 		if (preg->re_endp < pattern)
17460201Sbostic 			return(REG_INVARG);
17560201Sbostic 		len = preg->re_endp - pattern;
17660201Sbostic 	} else
17760201Sbostic 		len = strlen((char *)pattern);
17860201Sbostic 
17955847Sbostic 	/* do the mallocs early so failure handling is easy */
18060201Sbostic 	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
18160201Sbostic 							(NC-1)*sizeof(cat_t));
18255847Sbostic 	if (g == NULL)
18355847Sbostic 		return(REG_ESPACE);
18460201Sbostic 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
18555847Sbostic 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
18655847Sbostic 	p->slen = 0;
18755847Sbostic 	if (p->strip == NULL) {
18855847Sbostic 		free((char *)g);
18955847Sbostic 		return(REG_ESPACE);
19055847Sbostic 	}
19155847Sbostic 
19255847Sbostic 	/* set things up */
19355847Sbostic 	p->g = g;
19460201Sbostic 	p->next = (char *)pattern;	/* convenience; we do not modify it */
19560201Sbostic 	p->end = p->next + len;
19655847Sbostic 	p->error = 0;
19755847Sbostic 	p->ncsalloc = 0;
19855847Sbostic 	for (i = 0; i < NPAREN; i++) {
19955847Sbostic 		p->pbegin[i] = 0;
20055847Sbostic 		p->pend[i] = 0;
20155847Sbostic 	}
20260201Sbostic 	g->csetsize = NC;
20355847Sbostic 	g->sets = NULL;
20455847Sbostic 	g->setbits = NULL;
20555847Sbostic 	g->ncsets = 0;
20655847Sbostic 	g->cflags = cflags;
20755847Sbostic 	g->iflags = 0;
20860201Sbostic 	g->nbol = 0;
20960201Sbostic 	g->neol = 0;
21055847Sbostic 	g->must = NULL;
21155847Sbostic 	g->mlen = 0;
21255847Sbostic 	g->nsub = 0;
21355847Sbostic 	g->ncategories = 1;	/* category 0 is "everything else" */
21460610Sbostic 	g->categories = &g->catspace[-(CHAR_MIN)];
21560201Sbostic 	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
21655847Sbostic 	g->backrefs = 0;
21755847Sbostic 
21855847Sbostic 	/* do it */
21955847Sbostic 	EMIT(OEND, 0);
22055847Sbostic 	g->firststate = THERE();
22155847Sbostic 	if (cflags&REG_EXTENDED)
22260201Sbostic 		p_ere(p, OUT);
22360201Sbostic 	else if (cflags&REG_NOSPEC)
22460201Sbostic 		p_str(p);
22555847Sbostic 	else
22660201Sbostic 		p_bre(p, OUT, OUT);
22755847Sbostic 	EMIT(OEND, 0);
22855847Sbostic 	g->laststate = THERE();
22955847Sbostic 
23055847Sbostic 	/* tidy up loose ends and fill things in */
23155847Sbostic 	categorize(p, g);
23255847Sbostic 	stripsnug(p, g);
23355847Sbostic 	findmust(p, g);
23455847Sbostic 	g->nplus = pluscount(p, g);
23555847Sbostic 	g->magic = MAGIC2;
23655847Sbostic 	preg->re_nsub = g->nsub;
23755847Sbostic 	preg->re_g = g;
23855847Sbostic 	preg->re_magic = MAGIC1;
23956355Sbostic #ifndef REDEBUG
24055847Sbostic 	/* not debugging, so can't rely on the assert() in regexec() */
24155847Sbostic 	if (g->iflags&BAD)
24255847Sbostic 		SETERROR(REG_ASSERT);
24355847Sbostic #endif
24455847Sbostic 
24555847Sbostic 	/* win or lose, we're done */
24655847Sbostic 	if (p->error != 0)	/* lose */
24755847Sbostic 		regfree(preg);
24855847Sbostic 	return(p->error);
24955847Sbostic }
25055847Sbostic 
25155847Sbostic /*
25255847Sbostic  - p_ere - ERE parser top level, concatenation and alternation
25360201Sbostic  == static void p_ere(register struct parse *p, int stop);
25455847Sbostic  */
25555847Sbostic static void
25655847Sbostic p_ere(p, stop)
25755847Sbostic register struct parse *p;
25860201Sbostic int stop;			/* character this ERE should end at */
25955847Sbostic {
26060201Sbostic 	register char c;
26155847Sbostic 	register sopno prevback;
26255847Sbostic 	register sopno prevfwd;
26355847Sbostic 	register sopno conc;
26455847Sbostic 	register int first = 1;		/* is this the first alternative? */
26555847Sbostic 
26655847Sbostic 	for (;;) {
26755847Sbostic 		/* do a bunch of concatenated expressions */
26855847Sbostic 		conc = HERE();
26960201Sbostic 		while (MORE() && (c = PEEK()) != '|' && c != stop)
27055847Sbostic 			p_ere_exp(p);
27155847Sbostic 		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
27255847Sbostic 
27355847Sbostic 		if (!EAT('|'))
27455847Sbostic 			break;		/* NOTE BREAK OUT */
27555847Sbostic 
27655847Sbostic 		if (first) {
27755847Sbostic 			INSERT(OCH_, conc);	/* offset is wrong */
27855847Sbostic 			prevfwd = conc;
27955847Sbostic 			prevback = conc;
28055847Sbostic 			first = 0;
28155847Sbostic 		}
28260201Sbostic 		ASTERN(OOR1, prevback);
28355847Sbostic 		prevback = THERE();
28460201Sbostic 		AHEAD(prevfwd);			/* fix previous offset */
28555847Sbostic 		prevfwd = HERE();
28655847Sbostic 		EMIT(OOR2, 0);			/* offset is very wrong */
28755847Sbostic 	}
28855847Sbostic 
28955847Sbostic 	if (!first) {		/* tail-end fixups */
29060201Sbostic 		AHEAD(prevfwd);
29160201Sbostic 		ASTERN(O_CH, prevback);
29255847Sbostic 	}
29355847Sbostic 
29460201Sbostic 	assert(!MORE() || SEE(stop));
29555847Sbostic }
29655847Sbostic 
29755847Sbostic /*
29855847Sbostic  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
29960201Sbostic  == static void p_ere_exp(register struct parse *p);
30055847Sbostic  */
30155847Sbostic static void
30255847Sbostic p_ere_exp(p)
30355847Sbostic register struct parse *p;
30455847Sbostic {
30560201Sbostic 	register char c;
30655847Sbostic 	register sopno pos;
30755847Sbostic 	register int count;
30855847Sbostic 	register int count2;
30955847Sbostic 	register sopno subno;
31055847Sbostic 	int wascaret = 0;
31155847Sbostic 
31260201Sbostic 	assert(MORE());		/* caller should have ensured this */
31355847Sbostic 	c = GETNEXT();
31455847Sbostic 
31555847Sbostic 	pos = HERE();
31655847Sbostic 	switch (c) {
31755847Sbostic 	case '(':
31860201Sbostic 		REQUIRE(MORE(), REG_EPAREN);
31955847Sbostic 		p->g->nsub++;
32055847Sbostic 		subno = p->g->nsub;
32155847Sbostic 		if (subno < NPAREN)
32255847Sbostic 			p->pbegin[subno] = HERE();
32355847Sbostic 		EMIT(OLPAREN, subno);
32455847Sbostic 		if (!SEE(')'))
32555847Sbostic 			p_ere(p, ')');
32655847Sbostic 		if (subno < NPAREN) {
32755847Sbostic 			p->pend[subno] = HERE();
32855847Sbostic 			assert(p->pend[subno] != 0);
32955847Sbostic 		}
33055847Sbostic 		EMIT(ORPAREN, subno);
33155847Sbostic 		MUSTEAT(')', REG_EPAREN);
33255847Sbostic 		break;
33355847Sbostic #ifndef POSIX_MISTAKE
33455847Sbostic 	case ')':		/* happens only if no current unmatched ( */
33555847Sbostic 		/*
33655847Sbostic 		 * You may ask, why the ifndef?  Because I didn't notice
33755847Sbostic 		 * this until slightly too late for 1003.2, and none of the
33855847Sbostic 		 * other 1003.2 regular-expression reviewers noticed it at
33955847Sbostic 		 * all.  So an unmatched ) is legal POSIX, at least until
34055847Sbostic 		 * we can get it fixed.
34155847Sbostic 		 */
34255847Sbostic 		SETERROR(REG_EPAREN);
34355847Sbostic 		break;
34455847Sbostic #endif
34555847Sbostic 	case '^':
34655847Sbostic 		EMIT(OBOL, 0);
34755847Sbostic 		p->g->iflags |= USEBOL;
34860201Sbostic 		p->g->nbol++;
34955847Sbostic 		wascaret = 1;
35055847Sbostic 		break;
35155847Sbostic 	case '$':
35255847Sbostic 		EMIT(OEOL, 0);
35355847Sbostic 		p->g->iflags |= USEEOL;
35460201Sbostic 		p->g->neol++;
35555847Sbostic 		break;
35655847Sbostic 	case '|':
35755847Sbostic 		SETERROR(REG_EMPTY);
35855847Sbostic 		break;
35955847Sbostic 	case '*':
36055847Sbostic 	case '+':
36155847Sbostic 	case '?':
36255847Sbostic 		SETERROR(REG_BADRPT);
36355847Sbostic 		break;
36455847Sbostic 	case '.':
36555847Sbostic 		if (p->g->cflags&REG_NEWLINE)
36655847Sbostic 			nonnewline(p);
36755847Sbostic 		else
36855847Sbostic 			EMIT(OANY, 0);
36955847Sbostic 		break;
37055847Sbostic 	case '[':
37155847Sbostic 		p_bracket(p);
37255847Sbostic 		break;
37355847Sbostic 	case '\\':
37460201Sbostic 		REQUIRE(MORE(), REG_EESCAPE);
37555847Sbostic 		c = GETNEXT();
37660201Sbostic 		ordinary(p, c);
37755847Sbostic 		break;
37855847Sbostic 	case '{':		/* okay as ordinary except if digit follows */
37960201Sbostic 		REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
38055847Sbostic 		/* FALLTHROUGH */
38155847Sbostic 	default:
38255847Sbostic 		ordinary(p, c);
38355847Sbostic 		break;
38455847Sbostic 	}
38555847Sbostic 
38660201Sbostic 	if (!MORE())
38760201Sbostic 		return;
38855847Sbostic 	c = PEEK();
38960201Sbostic 	/* we call { a repetition if followed by a digit */
39060201Sbostic 	if (!( c == '*' || c == '+' || c == '?' ||
39160201Sbostic 				(c == '{' && MORE2() && isdigit(PEEK2())) ))
39255847Sbostic 		return;		/* no repetition, we're done */
39355847Sbostic 	NEXT();
39455847Sbostic 
39555847Sbostic 	REQUIRE(!wascaret, REG_BADRPT);
39655847Sbostic 	switch (c) {
39755847Sbostic 	case '*':	/* implemented as +? */
39855847Sbostic 		INSERT(OPLUS_, pos);
39960201Sbostic 		ASTERN(O_PLUS, pos);
40055847Sbostic 		INSERT(OQUEST_, pos);
40160201Sbostic 		ASTERN(O_QUEST, pos);
40255847Sbostic 		break;
40355847Sbostic 	case '+':
40455847Sbostic 		INSERT(OPLUS_, pos);
40560201Sbostic 		ASTERN(O_PLUS, pos);
40655847Sbostic 		break;
40755847Sbostic 	case '?':
40855847Sbostic 		INSERT(OQUEST_, pos);
40960201Sbostic 		ASTERN(O_QUEST, pos);
41055847Sbostic 		break;
41155847Sbostic 	case '{':
41255847Sbostic 		count = p_count(p);
41355847Sbostic 		if (EAT(',')) {
41455847Sbostic 			if (isdigit(PEEK())) {
41555847Sbostic 				count2 = p_count(p);
41655847Sbostic 				REQUIRE(count <= count2, REG_BADBR);
41755847Sbostic 			} else		/* single number with comma */
41855847Sbostic 				count2 = INFINITY;
41955847Sbostic 		} else		/* just a single number */
42055847Sbostic 			count2 = count;
42155847Sbostic 		repeat(p, pos, count, count2);
42255847Sbostic 		if (!EAT('}')) {	/* error heuristics */
42360201Sbostic 			while (MORE() && PEEK() != '}')
42455847Sbostic 				NEXT();
42560201Sbostic 			REQUIRE(MORE(), REG_EBRACE);
42660201Sbostic 			SETERROR(REG_BADBR);
42755847Sbostic 		}
42855847Sbostic 		break;
42955847Sbostic 	}
43055847Sbostic 
43160201Sbostic 	if (!MORE())
43260201Sbostic 		return;
43355847Sbostic 	c = PEEK();
43460201Sbostic 	if (!( c == '*' || c == '+' || c == '?' ||
43560201Sbostic 				(c == '{' && MORE2() && isdigit(PEEK2())) ) )
43660201Sbostic 		return;
43760201Sbostic 	SETERROR(REG_BADRPT);
43855847Sbostic }
43955847Sbostic 
44055847Sbostic /*
44160201Sbostic  - p_str - string (no metacharacters) "parser"
44260201Sbostic  == static void p_str(register struct parse *p);
44360201Sbostic  */
44460201Sbostic static void
44560201Sbostic p_str(p)
44660201Sbostic register struct parse *p;
44760201Sbostic {
44860201Sbostic 	REQUIRE(MORE(), REG_EMPTY);
44960201Sbostic 	while (MORE())
45060201Sbostic 		ordinary(p, GETNEXT());
45160201Sbostic }
45260201Sbostic 
45360201Sbostic /*
45455847Sbostic  - p_bre - BRE parser top level, anchoring and concatenation
45560201Sbostic  == static void p_bre(register struct parse *p, register int end1, \
45660201Sbostic  ==	register int end2);
45760201Sbostic  * Giving end1 as OUT essentially eliminates the end1/end2 check.
45855847Sbostic  *
45956355Sbostic  * This implementation is a bit of a kludge, in that a trailing $ is first
46056355Sbostic  * taken as an ordinary character and then revised to be an anchor.  The
46156355Sbostic  * only undesirable side effect is that '$' gets included as a character
46256355Sbostic  * category in such cases.  This is fairly harmless; not worth fixing.
46360201Sbostic  * The amount of lookahead needed to avoid this kludge is excessive.
46455847Sbostic  */
46555847Sbostic static void
46655847Sbostic p_bre(p, end1, end2)
46755847Sbostic register struct parse *p;
46860201Sbostic register int end1;		/* first terminating character */
46960201Sbostic register int end2;		/* second terminating character */
47055847Sbostic {
47155847Sbostic 	register sopno start = HERE();
47255847Sbostic 	register int first = 1;			/* first subexpression? */
47356355Sbostic 	register int wasdollar = 0;
47455847Sbostic 
47555847Sbostic 	if (EAT('^')) {
47655847Sbostic 		EMIT(OBOL, 0);
47755847Sbostic 		p->g->iflags |= USEBOL;
47860201Sbostic 		p->g->nbol++;
47955847Sbostic 	}
48060201Sbostic 	while (MORE() && !SEETWO(end1, end2)) {
48155847Sbostic 		wasdollar = p_simp_re(p, first);
48255847Sbostic 		first = 0;
48355847Sbostic 	}
48455847Sbostic 	if (wasdollar) {	/* oops, that was a trailing anchor */
48555847Sbostic 		DROP(1);
48655847Sbostic 		EMIT(OEOL, 0);
48755847Sbostic 		p->g->iflags |= USEEOL;
48860201Sbostic 		p->g->neol++;
48955847Sbostic 	}
49055847Sbostic 
49155847Sbostic 	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
49255847Sbostic }
49355847Sbostic 
49455847Sbostic /*
49555847Sbostic  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
49660201Sbostic  == static int p_simp_re(register struct parse *p, int starordinary);
49755847Sbostic  */
49855847Sbostic static int			/* was the simple RE an unbackslashed $? */
49955847Sbostic p_simp_re(p, starordinary)
50055847Sbostic register struct parse *p;
50155847Sbostic int starordinary;		/* is a leading * an ordinary character? */
50255847Sbostic {
50355847Sbostic 	register int c;
50455847Sbostic 	register int count;
50555847Sbostic 	register int count2;
50655847Sbostic 	register sopno pos;
50755847Sbostic 	register int i;
50855847Sbostic 	register sopno subno;
50955847Sbostic #	define	BACKSL	(1<<CHAR_BIT)
51055847Sbostic 
51155847Sbostic 	pos = HERE();		/* repetion op, if any, covers from here */
51255847Sbostic 
51360201Sbostic 	assert(MORE());		/* caller should have ensured this */
51455847Sbostic 	c = GETNEXT();
51560201Sbostic 	if (c == '\\') {
51660201Sbostic 		REQUIRE(MORE(), REG_EESCAPE);
51760201Sbostic 		c = BACKSL | (unsigned char)GETNEXT();
51860201Sbostic 	}
51955847Sbostic 	switch (c) {
52055847Sbostic 	case '.':
52155847Sbostic 		if (p->g->cflags&REG_NEWLINE)
52255847Sbostic 			nonnewline(p);
52355847Sbostic 		else
52455847Sbostic 			EMIT(OANY, 0);
52555847Sbostic 		break;
52655847Sbostic 	case '[':
52755847Sbostic 		p_bracket(p);
52855847Sbostic 		break;
52955847Sbostic 	case BACKSL|'{':
53055847Sbostic 		SETERROR(REG_BADRPT);
53155847Sbostic 		break;
53255847Sbostic 	case BACKSL|'(':
53355847Sbostic 		p->g->nsub++;
53455847Sbostic 		subno = p->g->nsub;
53555847Sbostic 		if (subno < NPAREN)
53655847Sbostic 			p->pbegin[subno] = HERE();
53755847Sbostic 		EMIT(OLPAREN, subno);
53860201Sbostic 		/* the MORE here is an error heuristic */
53960201Sbostic 		if (MORE() && !SEETWO('\\', ')'))
54055847Sbostic 			p_bre(p, '\\', ')');
54155847Sbostic 		if (subno < NPAREN) {
54255847Sbostic 			p->pend[subno] = HERE();
54355847Sbostic 			assert(p->pend[subno] != 0);
54455847Sbostic 		}
54555847Sbostic 		EMIT(ORPAREN, subno);
54655847Sbostic 		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
54755847Sbostic 		break;
54855847Sbostic 	case BACKSL|')':	/* should not get here -- must be user */
54955847Sbostic 	case BACKSL|'}':
55055847Sbostic 		SETERROR(REG_EPAREN);
55155847Sbostic 		break;
55255847Sbostic 	case BACKSL|'1':
55355847Sbostic 	case BACKSL|'2':
55455847Sbostic 	case BACKSL|'3':
55555847Sbostic 	case BACKSL|'4':
55655847Sbostic 	case BACKSL|'5':
55755847Sbostic 	case BACKSL|'6':
55855847Sbostic 	case BACKSL|'7':
55955847Sbostic 	case BACKSL|'8':
56055847Sbostic 	case BACKSL|'9':
56155847Sbostic 		i = (c&~BACKSL) - '0';
56255847Sbostic 		assert(i < NPAREN);
56355847Sbostic 		if (p->pend[i] != 0) {
56455847Sbostic 			assert(i <= p->g->nsub);
56555847Sbostic 			EMIT(OBACK_, i);
56655847Sbostic 			assert(p->pbegin[i] != 0);
56755847Sbostic 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
56855847Sbostic 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
56955847Sbostic 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
57055847Sbostic 			EMIT(O_BACK, i);
57155847Sbostic 		} else
57255847Sbostic 			SETERROR(REG_ESUBREG);
57355847Sbostic 		p->g->backrefs = 1;
57455847Sbostic 		break;
57555847Sbostic 	case '*':
57655847Sbostic 		REQUIRE(starordinary, REG_BADRPT);
57755847Sbostic 		/* FALLTHROUGH */
57855847Sbostic 	default:
57960201Sbostic 		ordinary(p, c &~ BACKSL);
58055847Sbostic 		break;
58155847Sbostic 	}
58255847Sbostic 
58355847Sbostic 	if (EAT('*')) {		/* implemented as +? */
58455847Sbostic 		INSERT(OPLUS_, pos);
58560201Sbostic 		ASTERN(O_PLUS, pos);
58655847Sbostic 		INSERT(OQUEST_, pos);
58760201Sbostic 		ASTERN(O_QUEST, pos);
58855847Sbostic 	} else if (EATTWO('\\', '{')) {
58955847Sbostic 		count = p_count(p);
59055847Sbostic 		if (EAT(',')) {
59160201Sbostic 			if (MORE() && isdigit(PEEK())) {
59255847Sbostic 				count2 = p_count(p);
59355847Sbostic 				REQUIRE(count <= count2, REG_BADBR);
59455847Sbostic 			} else		/* single number with comma */
59555847Sbostic 				count2 = INFINITY;
59655847Sbostic 		} else		/* just a single number */
59755847Sbostic 			count2 = count;
59855847Sbostic 		repeat(p, pos, count, count2);
59955847Sbostic 		if (!EATTWO('\\', '}')) {	/* error heuristics */
60060201Sbostic 			while (MORE() && !SEETWO('\\', '}'))
60155847Sbostic 				NEXT();
60260201Sbostic 			REQUIRE(MORE(), REG_EBRACE);
60360201Sbostic 			SETERROR(REG_BADBR);
60455847Sbostic 		}
60560201Sbostic 	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
60655847Sbostic 		return(1);
60755847Sbostic 
60855847Sbostic 	return(0);
60955847Sbostic }
61055847Sbostic 
61155847Sbostic /*
61255847Sbostic  - p_count - parse a repetition count
61360201Sbostic  == static int p_count(register struct parse *p);
61455847Sbostic  */
61555847Sbostic static int			/* the value */
61655847Sbostic p_count(p)
61755847Sbostic register struct parse *p;
61855847Sbostic {
61955847Sbostic 	register int count = 0;
62055847Sbostic 	register int ndigits = 0;
62155847Sbostic 
62260201Sbostic 	while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
62355847Sbostic 		count = count*10 + (GETNEXT() - '0');
62455847Sbostic 		ndigits++;
62555847Sbostic 	}
62655847Sbostic 
62755847Sbostic 	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
62855847Sbostic 	return(count);
62955847Sbostic }
63055847Sbostic 
63155847Sbostic /*
63255847Sbostic  - p_bracket - parse a bracketed character list
63360201Sbostic  == static void p_bracket(register struct parse *p);
63455847Sbostic  *
63555847Sbostic  * Note a significant property of this code:  if the allocset() did SETERROR,
63655847Sbostic  * no set operations are done.
63755847Sbostic  */
63855847Sbostic static void
63955847Sbostic p_bracket(p)
64055847Sbostic register struct parse *p;
64155847Sbostic {
64260201Sbostic 	register char c;
64355847Sbostic 	register cset *cs = allocset(p);
64455847Sbostic 	register int invert = 0;
64555847Sbostic 
64660201Sbostic 	/* Dept of Truly Sickening Special-Case Kludges */
64760201Sbostic 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
64860201Sbostic 		EMIT(OBOW, 0);
64960201Sbostic 		NEXTn(6);
65060201Sbostic 		return;
65160201Sbostic 	}
65260201Sbostic 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
65360201Sbostic 		EMIT(OEOW, 0);
65460201Sbostic 		NEXTn(6);
65560201Sbostic 		return;
65660201Sbostic 	}
65760201Sbostic 
65855847Sbostic 	if (EAT('^'))
65955847Sbostic 		invert++;	/* make note to invert set at end */
66055847Sbostic 	if (EAT(']'))
66155847Sbostic 		CHadd(cs, ']');
66256618Sbostic 	else if (EAT('-'))
66356618Sbostic 		CHadd(cs, '-');
66460201Sbostic 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
66555847Sbostic 		p_b_term(p, cs);
66655847Sbostic 	if (EAT('-'))
66755847Sbostic 		CHadd(cs, '-');
66855847Sbostic 	MUSTEAT(']', REG_EBRACK);
66955847Sbostic 
67060201Sbostic 	if (p->error != 0)	/* don't mess things up further */
67160201Sbostic 		return;
67260201Sbostic 
67360201Sbostic 	if (p->g->cflags&REG_ICASE) {
67455847Sbostic 		register int i;
67560201Sbostic 		register int ci;
67655847Sbostic 
67755847Sbostic 		for (i = p->g->csetsize - 1; i >= 0; i--)
67860201Sbostic 			if (CHIN(cs, i) && isalpha(i)) {
67960201Sbostic 				ci = othercase(i);
68060201Sbostic 				if (ci != i)
68160201Sbostic 					CHadd(cs, ci);
68260201Sbostic 			}
68360201Sbostic 		if (cs->multis != NULL)
68460201Sbostic 			mccase(p, cs);
68560201Sbostic 	}
68660201Sbostic 	if (invert) {
68760201Sbostic 		register int i;
68860201Sbostic 
68960201Sbostic 		for (i = p->g->csetsize - 1; i >= 0; i--)
69055847Sbostic 			if (CHIN(cs, i))
69155847Sbostic 				CHsub(cs, i);
69255847Sbostic 			else
69355847Sbostic 				CHadd(cs, i);
69455847Sbostic 		if (p->g->cflags&REG_NEWLINE)
69555847Sbostic 			CHsub(cs, '\n');
69655847Sbostic 		if (cs->multis != NULL)
69755847Sbostic 			mcinvert(p, cs);
69855847Sbostic 	}
69960201Sbostic 
70055847Sbostic 	assert(cs->multis == NULL);		/* xxx */
70160201Sbostic 
70260201Sbostic 	if (nch(p, cs) == 1) {		/* optimize singleton sets */
70360201Sbostic 		ordinary(p, firstch(p, cs));
70460201Sbostic 		freeset(p, cs);
70560201Sbostic 	} else
70660201Sbostic 		EMIT(OANYOF, freezeset(p, cs));
70755847Sbostic }
70855847Sbostic 
70955847Sbostic /*
71055847Sbostic  - p_b_term - parse one term of a bracketed character list
71160201Sbostic  == static void p_b_term(register struct parse *p, register cset *cs);
71255847Sbostic  */
71355847Sbostic static void
71455847Sbostic p_b_term(p, cs)
71555847Sbostic register struct parse *p;
71655847Sbostic register cset *cs;
71755847Sbostic {
71860201Sbostic 	register char c;
71960201Sbostic 	register char start, finish;
72055847Sbostic 	register int i;
72155847Sbostic 
72255847Sbostic 	/* classify what we've got */
72360201Sbostic 	switch ((MORE()) ? PEEK() : '\0') {
72455847Sbostic 	case '[':
72560201Sbostic 		c = (MORE2()) ? PEEK2() : '\0';
72655847Sbostic 		break;
72755847Sbostic 	case '-':
72855847Sbostic 		SETERROR(REG_ERANGE);
72955847Sbostic 		return;			/* NOTE RETURN */
73055847Sbostic 		break;
73155847Sbostic 	default:
73255847Sbostic 		c = '\0';
73355847Sbostic 		break;
73455847Sbostic 	}
73555847Sbostic 
73655847Sbostic 	switch (c) {
73755847Sbostic 	case ':':		/* character class */
73855847Sbostic 		NEXT2();
73960201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
74055847Sbostic 		c = PEEK();
74155847Sbostic 		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
74255847Sbostic 		p_b_cclass(p, cs);
74360201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
74455847Sbostic 		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
74555847Sbostic 		break;
74655847Sbostic 	case '=':		/* equivalence class */
74755847Sbostic 		NEXT2();
74860201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
74955847Sbostic 		c = PEEK();
75055847Sbostic 		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
75155847Sbostic 		p_b_eclass(p, cs);
75260201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
75355847Sbostic 		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
75455847Sbostic 		break;
75555847Sbostic 	default:		/* symbol, ordinary character, or range */
75655847Sbostic /* xxx revision needed for multichar stuff */
75755847Sbostic 		start = p_b_symbol(p);
75860201Sbostic 		if (SEE('-') && MORE2() && PEEK2() != ']') {
75955847Sbostic 			/* range */
76055847Sbostic 			NEXT();
76155847Sbostic 			if (EAT('-'))
76255847Sbostic 				finish = '-';
76355847Sbostic 			else
76455847Sbostic 				finish = p_b_symbol(p);
76555847Sbostic 		} else
76655847Sbostic 			finish = start;
76760201Sbostic /* xxx what about signed chars here... */
76855847Sbostic 		REQUIRE(start <= finish, REG_ERANGE);
76960201Sbostic 		for (i = start; i <= finish; i++)
77055847Sbostic 			CHadd(cs, i);
77155847Sbostic 		break;
77255847Sbostic 	}
77355847Sbostic }
77455847Sbostic 
77555847Sbostic /*
77655847Sbostic  - p_b_cclass - parse a character-class name and deal with it
77760201Sbostic  == static void p_b_cclass(register struct parse *p, register cset *cs);
77855847Sbostic  */
77955847Sbostic static void
78055847Sbostic p_b_cclass(p, cs)
78155847Sbostic register struct parse *p;
78255847Sbostic register cset *cs;
78355847Sbostic {
78460201Sbostic 	register char *sp = p->next;
78555847Sbostic 	register struct cclass *cp;
78660201Sbostic 	register size_t len;
78760201Sbostic 	register char *u;
78860201Sbostic 	register char c;
78955847Sbostic 
79060201Sbostic 	while (MORE() && isalpha(PEEK()))
79160201Sbostic 		NEXT();
79260201Sbostic 	len = p->next - sp;
79355847Sbostic 	for (cp = cclasses; cp->name != NULL; cp++)
79460201Sbostic 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
79555847Sbostic 			break;
79655847Sbostic 	if (cp->name == NULL) {
79755847Sbostic 		/* oops, didn't find it */
79855847Sbostic 		SETERROR(REG_ECTYPE);
79955847Sbostic 		return;
80055847Sbostic 	}
80155847Sbostic 
80260201Sbostic 	u = cp->chars;
80355847Sbostic 	while ((c = *u++) != '\0')
80455847Sbostic 		CHadd(cs, c);
80560201Sbostic 	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
806*66362Sbostic 		MCadd(p, cs, u);
80755847Sbostic }
80855847Sbostic 
80955847Sbostic /*
81055847Sbostic  - p_b_eclass - parse an equivalence-class name and deal with it
81160201Sbostic  == static void p_b_eclass(register struct parse *p, register cset *cs);
81255847Sbostic  *
81355847Sbostic  * This implementation is incomplete. xxx
81455847Sbostic  */
81555847Sbostic static void
81655847Sbostic p_b_eclass(p, cs)
81755847Sbostic register struct parse *p;
81855847Sbostic register cset *cs;
81955847Sbostic {
82060201Sbostic 	register char c;
82155847Sbostic 
82255847Sbostic 	c = p_b_coll_elem(p, '=');
82355847Sbostic 	CHadd(cs, c);
82455847Sbostic }
82555847Sbostic 
82655847Sbostic /*
82755847Sbostic  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
82860201Sbostic  == static char p_b_symbol(register struct parse *p);
82955847Sbostic  */
83060201Sbostic static char			/* value of symbol */
83155847Sbostic p_b_symbol(p)
83255847Sbostic register struct parse *p;
83355847Sbostic {
83460201Sbostic 	register char value;
83555847Sbostic 
83660201Sbostic 	REQUIRE(MORE(), REG_EBRACK);
83760201Sbostic 	if (!EATTWO('[', '.'))
83855847Sbostic 		return(GETNEXT());
83955847Sbostic 
84055847Sbostic 	/* collating symbol */
84155847Sbostic 	value = p_b_coll_elem(p, '.');
84255847Sbostic 	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
84355847Sbostic 	return(value);
84455847Sbostic }
84555847Sbostic 
84655847Sbostic /*
84755847Sbostic  - p_b_coll_elem - parse a collating-element name and look it up
84860201Sbostic  == static char p_b_coll_elem(register struct parse *p, int endc);
84955847Sbostic  */
85060201Sbostic static char			/* value of collating element */
85155847Sbostic p_b_coll_elem(p, endc)
85255847Sbostic register struct parse *p;
85360201Sbostic int endc;			/* name ended by endc,']' */
85455847Sbostic {
85560201Sbostic 	register char *sp = p->next;
85655847Sbostic 	register struct cname *cp;
85755847Sbostic 	register int len;
85860201Sbostic 	register char c;
85955847Sbostic 
86060201Sbostic 	while (MORE() && !SEETWO(endc, ']'))
86155847Sbostic 		NEXT();
86260201Sbostic 	if (!MORE()) {
86355847Sbostic 		SETERROR(REG_EBRACK);
86455847Sbostic 		return(0);
86555847Sbostic 	}
86655847Sbostic 	len = p->next - sp;
86755847Sbostic 	for (cp = cnames; cp->name != NULL; cp++)
86860201Sbostic 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
86955847Sbostic 			return(cp->code);	/* known name */
87055847Sbostic 	if (len == 1)
87155847Sbostic 		return(*sp);	/* single character */
87255847Sbostic 	SETERROR(REG_ECOLLATE);			/* neither */
87355847Sbostic 	return(0);
87455847Sbostic }
87555847Sbostic 
87655847Sbostic /*
87755847Sbostic  - othercase - return the case counterpart of an alphabetic
87860201Sbostic  == static char othercase(int ch);
87955847Sbostic  */
88060201Sbostic static char			/* if no counterpart, return ch */
88155847Sbostic othercase(ch)
88260201Sbostic int ch;
88355847Sbostic {
88455847Sbostic 	assert(isalpha(ch));
88555847Sbostic 	if (isupper(ch))
88655847Sbostic 		return(tolower(ch));
88755847Sbostic 	else if (islower(ch))
88855847Sbostic 		return(toupper(ch));
88955847Sbostic 	else			/* peculiar, but could happen */
89055847Sbostic 		return(ch);
89155847Sbostic }
89255847Sbostic 
89355847Sbostic /*
89460201Sbostic  - bothcases - emit a dualcase version of a two-case character
89560201Sbostic  == static void bothcases(register struct parse *p, int ch);
89656355Sbostic  *
89755847Sbostic  * Boy, is this implementation ever a kludge...
89855847Sbostic  */
89955847Sbostic static void
90055847Sbostic bothcases(p, ch)
90155847Sbostic register struct parse *p;
90260201Sbostic int ch;
90355847Sbostic {
90460201Sbostic 	register char *oldnext = p->next;
90560201Sbostic 	register char *oldend = p->end;
90660201Sbostic 	char bracket[3];
90755847Sbostic 
90860201Sbostic 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
90955847Sbostic 	p->next = bracket;
91060201Sbostic 	p->end = bracket+2;
91155847Sbostic 	bracket[0] = ch;
91255847Sbostic 	bracket[1] = ']';
91355847Sbostic 	bracket[2] = '\0';
91455847Sbostic 	p_bracket(p);
91555847Sbostic 	assert(p->next == bracket+2);
91655847Sbostic 	p->next = oldnext;
91760201Sbostic 	p->end = oldend;
91855847Sbostic }
91955847Sbostic 
92055847Sbostic /*
92155847Sbostic  - ordinary - emit an ordinary character
92260201Sbostic  == static void ordinary(register struct parse *p, register int ch);
92355847Sbostic  */
92455847Sbostic static void
92555847Sbostic ordinary(p, ch)
92655847Sbostic register struct parse *p;
92760201Sbostic register int ch;
92855847Sbostic {
92960201Sbostic 	register cat_t *cap = p->g->categories;
93055847Sbostic 
93160201Sbostic 	if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
93255847Sbostic 		bothcases(p, ch);
93360201Sbostic 	else {
93460201Sbostic 		EMIT(OCHAR, (unsigned char)ch);
93560201Sbostic 		if (cap[ch] == 0)
93660201Sbostic 			cap[ch] = p->g->ncategories++;
93755847Sbostic 	}
93855847Sbostic }
93955847Sbostic 
94055847Sbostic /*
94155847Sbostic  - nonnewline - emit REG_NEWLINE version of OANY
94260201Sbostic  == static void nonnewline(register struct parse *p);
94356355Sbostic  *
94455847Sbostic  * Boy, is this implementation ever a kludge...
94555847Sbostic  */
94655847Sbostic static void
94755847Sbostic nonnewline(p)
94855847Sbostic register struct parse *p;
94955847Sbostic {
95060201Sbostic 	register char *oldnext = p->next;
95160201Sbostic 	register char *oldend = p->end;
95260201Sbostic 	char bracket[4];
95355847Sbostic 
95455847Sbostic 	p->next = bracket;
95560201Sbostic 	p->end = bracket+3;
95655847Sbostic 	bracket[0] = '^';
95755847Sbostic 	bracket[1] = '\n';
95855847Sbostic 	bracket[2] = ']';
95955847Sbostic 	bracket[3] = '\0';
96055847Sbostic 	p_bracket(p);
96155847Sbostic 	assert(p->next == bracket+3);
96255847Sbostic 	p->next = oldnext;
96360201Sbostic 	p->end = oldend;
96455847Sbostic }
96555847Sbostic 
96655847Sbostic /*
96755847Sbostic  - repeat - generate code for a bounded repetition, recursively if needed
96860201Sbostic  == static void repeat(register struct parse *p, sopno start, int from, int to);
96955847Sbostic  */
97055847Sbostic static void
97155847Sbostic repeat(p, start, from, to)
97255847Sbostic register struct parse *p;
97355847Sbostic sopno start;			/* operand from here to end of strip */
97455847Sbostic int from;			/* repeated from this number */
97555847Sbostic int to;				/* to this number of times (maybe INFINITY) */
97655847Sbostic {
97755847Sbostic 	register sopno finish = HERE();
97855847Sbostic #	define	N	2
97955847Sbostic #	define	INF	3
98055847Sbostic #	define	REP(f, t)	((f)*8 + (t))
98155847Sbostic #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
98255847Sbostic 	register sopno copy;
98355847Sbostic 
98455847Sbostic 	if (p->error != 0)	/* head off possible runaway recursion */
98555847Sbostic 		return;
98655847Sbostic 
98755847Sbostic 	assert(from <= to);
98855847Sbostic 
98955847Sbostic 	switch (REP(MAP(from), MAP(to))) {
99055847Sbostic 	case REP(0, 0):			/* must be user doing this */
99155847Sbostic 		DROP(finish-start);	/* drop the operand */
99255847Sbostic 		break;
99355847Sbostic 	case REP(0, 1):			/* as x{1,1}? */
99455847Sbostic 	case REP(0, N):			/* as x{1,n}? */
99555847Sbostic 	case REP(0, INF):		/* as x{1,}? */
99655847Sbostic 		INSERT(OQUEST_, start);		/* offset is wrong... */
99755847Sbostic 		repeat(p, start+1, 1, to);
99860201Sbostic 		AHEAD(start);			/* ... fix it */
99960201Sbostic 		ASTERN(O_QUEST, start);
100055847Sbostic 		break;
100155847Sbostic 	case REP(1, 1):			/* trivial case */
100255847Sbostic 		/* done */
100355847Sbostic 		break;
100455847Sbostic 	case REP(1, N):			/* as x?x{1,n-1} */
100555847Sbostic 		INSERT(OQUEST_, start);
100660201Sbostic 		ASTERN(O_QUEST, start);
100755847Sbostic 		copy = dupl(p, start+1, finish+1);
100855847Sbostic 		assert(copy == finish+2);
100955847Sbostic 		repeat(p, copy, 1, to-1);
101055847Sbostic 		break;
101155847Sbostic 	case REP(1, INF):		/* as x+ */
101255847Sbostic 		INSERT(OPLUS_, start);
101360201Sbostic 		ASTERN(O_PLUS, start);
101455847Sbostic 		break;
101555847Sbostic 	case REP(N, N):			/* as xx{m-1,n-1} */
101655847Sbostic 		copy = dupl(p, start, finish);
101755847Sbostic 		repeat(p, copy, from-1, to-1);
101855847Sbostic 		break;
101955847Sbostic 	case REP(N, INF):		/* as xx{n-1,INF} */
102055847Sbostic 		copy = dupl(p, start, finish);
102155847Sbostic 		repeat(p, copy, from-1, to);
102255847Sbostic 		break;
102355847Sbostic 	default:			/* "can't happen" */
102455847Sbostic 		SETERROR(REG_ASSERT);	/* just in case */
102555847Sbostic 		break;
102655847Sbostic 	}
102755847Sbostic }
102855847Sbostic 
102955847Sbostic /*
103055847Sbostic  - seterr - set an error condition
103160201Sbostic  == static int seterr(register struct parse *p, int e);
103255847Sbostic  */
103355847Sbostic static int			/* useless but makes type checking happy */
103455847Sbostic seterr(p, e)
103555847Sbostic register struct parse *p;
103655847Sbostic int e;
103755847Sbostic {
103855847Sbostic 	if (p->error == 0)	/* keep earliest error condition */
103955847Sbostic 		p->error = e;
104055847Sbostic 	p->next = nuls;		/* try to bring things to a halt */
104160201Sbostic 	p->end = nuls;
104255847Sbostic 	return(0);		/* make the return value well-defined */
104355847Sbostic }
104455847Sbostic 
104555847Sbostic /*
104655847Sbostic  - allocset - allocate a set of characters for []
104760201Sbostic  == static cset *allocset(register struct parse *p);
104855847Sbostic  */
104955847Sbostic static cset *
105055847Sbostic allocset(p)
105155847Sbostic register struct parse *p;
105255847Sbostic {
105355847Sbostic 	register int no = p->g->ncsets++;
105455847Sbostic 	register size_t nc;
105555847Sbostic 	register size_t nbytes;
105655847Sbostic 	register cset *cs;
105755847Sbostic 	register size_t css = (size_t)p->g->csetsize;
1058*66362Sbostic 	register int i;
105955847Sbostic 
106055847Sbostic 	if (no >= p->ncsalloc) {	/* need another column of space */
106155847Sbostic 		p->ncsalloc += CHAR_BIT;
106255847Sbostic 		nc = p->ncsalloc;
106355847Sbostic 		assert(nc % CHAR_BIT == 0);
106455847Sbostic 		nbytes = nc / CHAR_BIT * css;
106555847Sbostic 		if (p->g->sets == NULL)
106655847Sbostic 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
106755847Sbostic 		else
106855847Sbostic 			p->g->sets = (cset *)realloc((char *)p->g->sets,
106955847Sbostic 							nc * sizeof(cset));
107055847Sbostic 		if (p->g->setbits == NULL)
1071*66362Sbostic 			p->g->setbits = (uch *)malloc(nbytes);
1072*66362Sbostic 		else {
1073*66362Sbostic 			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
107455847Sbostic 								nbytes);
1075*66362Sbostic 			/* xxx this isn't right if setbits is now NULL */
1076*66362Sbostic 			for (i = 0; i < no; i++)
1077*66362Sbostic 				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1078*66362Sbostic 		}
107955847Sbostic 		if (p->g->sets != NULL && p->g->setbits != NULL)
108055847Sbostic 			(void) memset((char *)p->g->setbits + (nbytes - css),
108155847Sbostic 								0, css);
108255847Sbostic 		else {
108355847Sbostic 			no = 0;
108455847Sbostic 			SETERROR(REG_ESPACE);
108555847Sbostic 			/* caller's responsibility not to do set ops */
108655847Sbostic 		}
108755847Sbostic 	}
108855847Sbostic 
108955847Sbostic 	assert(p->g->sets != NULL);	/* xxx */
109055847Sbostic 	cs = &p->g->sets[no];
109155847Sbostic 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
109255847Sbostic 	cs->mask = 1 << ((no) % CHAR_BIT);
109355847Sbostic 	cs->hash = 0;
109455847Sbostic 	cs->smultis = 0;
109555847Sbostic 	cs->multis = NULL;
109655847Sbostic 
109755847Sbostic 	return(cs);
109855847Sbostic }
109955847Sbostic 
110055847Sbostic /*
110160201Sbostic  - freeset - free a now-unused set
110260201Sbostic  == static void freeset(register struct parse *p, register cset *cs);
110360201Sbostic  */
110460201Sbostic static void
110560201Sbostic freeset(p, cs)
110660201Sbostic register struct parse *p;
110760201Sbostic register cset *cs;
110860201Sbostic {
110960201Sbostic 	register int i;
111060201Sbostic 	register cset *top = &p->g->sets[p->g->ncsets];
111160201Sbostic 	register size_t css = (size_t)p->g->csetsize;
111260201Sbostic 
111360201Sbostic 	for (i = 0; i < css; i++)
111460201Sbostic 		CHsub(cs, i);
111560201Sbostic 	if (cs == top-1)	/* recover only the easy case */
111660201Sbostic 		p->g->ncsets--;
111760201Sbostic }
111860201Sbostic 
111960201Sbostic /*
112055847Sbostic  - freezeset - final processing on a set of characters
112160201Sbostic  == static int freezeset(register struct parse *p, register cset *cs);
112255847Sbostic  *
112355847Sbostic  * The main task here is merging identical sets.  This is usually a waste
112455847Sbostic  * of time (although the hash code minimizes the overhead), but can win
112555847Sbostic  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
112655847Sbostic  * is done using addition rather than xor -- all ASCII [aA] sets xor to
112755847Sbostic  * the same value!
112855847Sbostic  */
112955847Sbostic static int			/* set number */
113055847Sbostic freezeset(p, cs)
113155847Sbostic register struct parse *p;
113255847Sbostic register cset *cs;
113355847Sbostic {
1134*66362Sbostic 	register uch h = cs->hash;
113555847Sbostic 	register int i;
113655847Sbostic 	register cset *top = &p->g->sets[p->g->ncsets];
113755847Sbostic 	register cset *cs2;
113855847Sbostic 	register size_t css = (size_t)p->g->csetsize;
113955847Sbostic 
114055847Sbostic 	/* look for an earlier one which is the same */
114155847Sbostic 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
114255847Sbostic 		if (cs2->hash == h && cs2 != cs) {
114355847Sbostic 			/* maybe */
114455847Sbostic 			for (i = 0; i < css; i++)
114555847Sbostic 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
114655847Sbostic 					break;		/* no */
114755847Sbostic 			if (i == css)
114855847Sbostic 				break;			/* yes */
114955847Sbostic 		}
115055847Sbostic 
115155847Sbostic 	if (cs2 < top) {	/* found one */
115260201Sbostic 		freeset(p, cs);
115355847Sbostic 		cs = cs2;
115455847Sbostic 	}
115555847Sbostic 
115655847Sbostic 	return((int)(cs - p->g->sets));
115755847Sbostic }
115855847Sbostic 
115955847Sbostic /*
116060201Sbostic  - firstch - return first character in a set (which must have at least one)
116160201Sbostic  == static int firstch(register struct parse *p, register cset *cs);
116260201Sbostic  */
116360201Sbostic static int			/* character; there is no "none" value */
116460201Sbostic firstch(p, cs)
116560201Sbostic register struct parse *p;
116660201Sbostic register cset *cs;
116760201Sbostic {
116860201Sbostic 	register int i;
116960201Sbostic 	register size_t css = (size_t)p->g->csetsize;
117060201Sbostic 
117160201Sbostic 	for (i = 0; i < css; i++)
117260201Sbostic 		if (CHIN(cs, i))
117360201Sbostic 			return((char)i);
117460201Sbostic 	assert(never);
117560201Sbostic 	return(0);		/* arbitrary */
117660201Sbostic }
117760201Sbostic 
117860201Sbostic /*
117960201Sbostic  - nch - number of characters in a set
118060201Sbostic  == static int nch(register struct parse *p, register cset *cs);
118160201Sbostic  */
118260201Sbostic static int
118360201Sbostic nch(p, cs)
118460201Sbostic register struct parse *p;
118560201Sbostic register cset *cs;
118660201Sbostic {
118760201Sbostic 	register int i;
118860201Sbostic 	register size_t css = (size_t)p->g->csetsize;
118960201Sbostic 	register int n = 0;
119060201Sbostic 
119160201Sbostic 	for (i = 0; i < css; i++)
119260201Sbostic 		if (CHIN(cs, i))
119360201Sbostic 			n++;
119460201Sbostic 	return(n);
119560201Sbostic }
119660201Sbostic 
119760201Sbostic /*
119855847Sbostic  - mcadd - add a collating element to a cset
119960201Sbostic  == static void mcadd(register struct parse *p, register cset *cs, \
120060201Sbostic  ==	register char *cp);
120155847Sbostic  */
120255847Sbostic static void
120355847Sbostic mcadd(p, cs, cp)
120455847Sbostic register struct parse *p;
120555847Sbostic register cset *cs;
120660201Sbostic register char *cp;
120755847Sbostic {
120855847Sbostic 	register size_t oldend = cs->smultis;
120955847Sbostic 
121060201Sbostic 	cs->smultis += strlen(cp) + 1;
121155847Sbostic 	if (cs->multis == NULL)
121260201Sbostic 		cs->multis = malloc(cs->smultis);
121355847Sbostic 	else
121460201Sbostic 		cs->multis = realloc(cs->multis, cs->smultis);
121555847Sbostic 	if (cs->multis == NULL) {
121655847Sbostic 		SETERROR(REG_ESPACE);
121755847Sbostic 		return;
121855847Sbostic 	}
121955847Sbostic 
122060201Sbostic 	(void) strcpy(cs->multis + oldend - 1, cp);
122155847Sbostic 	cs->multis[cs->smultis - 1] = '\0';
122255847Sbostic }
122355847Sbostic 
122455847Sbostic /*
122555847Sbostic  - mcsub - subtract a collating element from a cset
122660201Sbostic  == static void mcsub(register cset *cs, register char *cp);
122755847Sbostic  */
122855847Sbostic static void
122960201Sbostic mcsub(cs, cp)
123055847Sbostic register cset *cs;
123160201Sbostic register char *cp;
123255847Sbostic {
123360201Sbostic 	register char *fp = mcfind(cs, cp);
123460201Sbostic 	register size_t len = strlen(fp);
123555847Sbostic 
123660201Sbostic 	assert(fp != NULL);
123760201Sbostic 	(void) memmove(fp, fp + len + 1,
123855847Sbostic 				cs->smultis - (fp + len + 1 - cs->multis));
123955847Sbostic 	cs->smultis -= len;
124055847Sbostic 
124155847Sbostic 	if (cs->smultis == 0) {
124260201Sbostic 		free(cs->multis);
124355847Sbostic 		cs->multis = NULL;
124455847Sbostic 		return;
124555847Sbostic 	}
124655847Sbostic 
124760201Sbostic 	cs->multis = realloc(cs->multis, cs->smultis);
124855847Sbostic 	assert(cs->multis != NULL);
124955847Sbostic }
125055847Sbostic 
125155847Sbostic /*
125255847Sbostic  - mcin - is a collating element in a cset?
125360201Sbostic  == static int mcin(register cset *cs, register char *cp);
125455847Sbostic  */
125555847Sbostic static int
125660201Sbostic mcin(cs, cp)
125755847Sbostic register cset *cs;
125860201Sbostic register char *cp;
125955847Sbostic {
126055847Sbostic 	return(mcfind(cs, cp) != NULL);
126155847Sbostic }
126255847Sbostic 
126355847Sbostic /*
126455847Sbostic  - mcfind - find a collating element in a cset
126560201Sbostic  == static char *mcfind(register cset *cs, register char *cp);
126655847Sbostic  */
126760201Sbostic static char *
126855847Sbostic mcfind(cs, cp)
126955847Sbostic register cset *cs;
127060201Sbostic register char *cp;
127155847Sbostic {
127260201Sbostic 	register char *p;
127355847Sbostic 
127455847Sbostic 	if (cs->multis == NULL)
127555847Sbostic 		return(NULL);
127660201Sbostic 	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
127760201Sbostic 		if (strcmp(cp, p) == 0)
127855847Sbostic 			return(p);
127955847Sbostic 	return(NULL);
128055847Sbostic }
128155847Sbostic 
128255847Sbostic /*
128355847Sbostic  - mcinvert - invert the list of collating elements in a cset
1284*66362Sbostic  == static void mcinvert(register struct parse *p, register cset *cs);
128555847Sbostic  *
128655847Sbostic  * This would have to know the set of possibilities.  Implementation
128755847Sbostic  * is deferred.
128855847Sbostic  */
128955847Sbostic static void
1290*66362Sbostic mcinvert(p, cs)
1291*66362Sbostic register struct parse *p;
129255847Sbostic register cset *cs;
129355847Sbostic {
129455847Sbostic 	assert(cs->multis == NULL);	/* xxx */
129555847Sbostic }
129655847Sbostic 
129755847Sbostic /*
129860201Sbostic  - mccase - add case counterparts of the list of collating elements in a cset
1299*66362Sbostic  == static void mccase(register struct parse *p, register cset *cs);
130060201Sbostic  *
130160201Sbostic  * This would have to know the set of possibilities.  Implementation
130260201Sbostic  * is deferred.
130360201Sbostic  */
130460201Sbostic static void
1305*66362Sbostic mccase(p, cs)
1306*66362Sbostic register struct parse *p;
130760201Sbostic register cset *cs;
130860201Sbostic {
130960201Sbostic 	assert(cs->multis == NULL);	/* xxx */
131060201Sbostic }
131160201Sbostic 
131260201Sbostic /*
131355847Sbostic  - isinsets - is this character in any sets?
131460201Sbostic  == static int isinsets(register struct re_guts *g, int c);
131555847Sbostic  */
131655847Sbostic static int			/* predicate */
131755847Sbostic isinsets(g, c)
131855847Sbostic register struct re_guts *g;
131960201Sbostic int c;
132055847Sbostic {
1321*66362Sbostic 	register uch *col;
132255847Sbostic 	register int i;
132355847Sbostic 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
132460201Sbostic 	register unsigned uc = (unsigned char)c;
132555847Sbostic 
132655847Sbostic 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
132760201Sbostic 		if (col[uc] != 0)
132855847Sbostic 			return(1);
132955847Sbostic 	return(0);
133055847Sbostic }
133155847Sbostic 
133255847Sbostic /*
133355847Sbostic  - samesets - are these two characters in exactly the same sets?
133460201Sbostic  == static int samesets(register struct re_guts *g, int c1, int c2);
133555847Sbostic  */
133655847Sbostic static int			/* predicate */
133755847Sbostic samesets(g, c1, c2)
133855847Sbostic register struct re_guts *g;
133960201Sbostic int c1;
134060201Sbostic int c2;
134155847Sbostic {
1342*66362Sbostic 	register uch *col;
134355847Sbostic 	register int i;
134455847Sbostic 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
134560201Sbostic 	register unsigned uc1 = (unsigned char)c1;
134660201Sbostic 	register unsigned uc2 = (unsigned char)c2;
134755847Sbostic 
134855847Sbostic 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
134960201Sbostic 		if (col[uc1] != col[uc2])
135055847Sbostic 			return(0);
135155847Sbostic 	return(1);
135255847Sbostic }
135355847Sbostic 
135455847Sbostic /*
135555847Sbostic  - categorize - sort out character categories
135660201Sbostic  == static void categorize(struct parse *p, register struct re_guts *g);
135755847Sbostic  */
135855847Sbostic static void
135955847Sbostic categorize(p, g)
136055847Sbostic struct parse *p;
136155847Sbostic register struct re_guts *g;
136255847Sbostic {
136360201Sbostic 	register cat_t *cats = g->categories;
136460201Sbostic 	register int c;
136560201Sbostic 	register int c2;
136660201Sbostic 	register cat_t cat;
136755847Sbostic 
136855847Sbostic 	/* avoid making error situations worse */
136955847Sbostic 	if (p->error != 0)
137055847Sbostic 		return;
137155847Sbostic 
137260201Sbostic 	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
137355847Sbostic 		if (cats[c] == 0 && isinsets(g, c)) {
137455847Sbostic 			cat = g->ncategories++;
137555847Sbostic 			cats[c] = cat;
137660201Sbostic 			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
137755847Sbostic 				if (cats[c2] == 0 && samesets(g, c, c2))
137855847Sbostic 					cats[c2] = cat;
137955847Sbostic 		}
138055847Sbostic }
138155847Sbostic 
138255847Sbostic /*
138355847Sbostic  - dupl - emit a duplicate of a bunch of sops
138460201Sbostic  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
138555847Sbostic  */
138655847Sbostic static sopno			/* start of duplicate */
138755847Sbostic dupl(p, start, finish)
138855847Sbostic register struct parse *p;
138955847Sbostic sopno start;			/* from here */
139055847Sbostic sopno finish;			/* to this less one */
139155847Sbostic {
139255847Sbostic 	register sopno ret = HERE();
139355847Sbostic 	register sopno len = finish - start;
139455847Sbostic 
139555847Sbostic 	assert(finish >= start);
139655847Sbostic 	if (len == 0)
139755847Sbostic 		return(ret);
139855847Sbostic 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
139955847Sbostic 	assert(p->ssize >= p->slen + len);
140055847Sbostic 	(void) memcpy((char *)(p->strip + p->slen),
140155847Sbostic 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
140255847Sbostic 	p->slen += len;
140355847Sbostic 	return(ret);
140455847Sbostic }
140555847Sbostic 
140655847Sbostic /*
140755847Sbostic  - doemit - emit a strip operator
140860201Sbostic  == static void doemit(register struct parse *p, sop op, size_t opnd);
140955847Sbostic  *
141055847Sbostic  * It might seem better to implement this as a macro with a function as
141155847Sbostic  * hard-case backup, but it's just too big and messy unless there are
141255847Sbostic  * some changes to the data structures.  Maybe later.
141355847Sbostic  */
141455847Sbostic static void
141555847Sbostic doemit(p, op, opnd)
141655847Sbostic register struct parse *p;
141755847Sbostic sop op;
141855847Sbostic size_t opnd;
141955847Sbostic {
142055847Sbostic 	/* avoid making error situations worse */
142155847Sbostic 	if (p->error != 0)
142255847Sbostic 		return;
142355847Sbostic 
142455847Sbostic 	/* deal with oversize operands ("can't happen", more or less) */
142555847Sbostic 	assert(opnd < 1<<OPSHIFT);
142655847Sbostic 
142755847Sbostic 	/* deal with undersized strip */
142855847Sbostic 	if (p->slen >= p->ssize)
142955847Sbostic 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
143055847Sbostic 	assert(p->slen < p->ssize);
143155847Sbostic 
143255847Sbostic 	/* finally, it's all reduced to the easy case */
143355847Sbostic 	p->strip[p->slen++] = SOP(op, opnd);
143455847Sbostic }
143555847Sbostic 
143655847Sbostic /*
143755847Sbostic  - doinsert - insert a sop into the strip
143860201Sbostic  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
143955847Sbostic  */
144055847Sbostic static void
144155847Sbostic doinsert(p, op, opnd, pos)
144255847Sbostic register struct parse *p;
144355847Sbostic sop op;
144455847Sbostic size_t opnd;
144555847Sbostic sopno pos;
144655847Sbostic {
144755847Sbostic 	register sopno sn;
144855847Sbostic 	register sop s;
144955847Sbostic 	register int i;
145055847Sbostic 
145155847Sbostic 	/* avoid making error situations worse */
145255847Sbostic 	if (p->error != 0)
145355847Sbostic 		return;
145455847Sbostic 
145555847Sbostic 	sn = HERE();
145655847Sbostic 	EMIT(op, opnd);		/* do checks, ensure space */
145755847Sbostic 	assert(HERE() == sn+1);
145855847Sbostic 	s = p->strip[sn];
145955847Sbostic 
146055847Sbostic 	/* adjust paren pointers */
146155847Sbostic 	assert(pos > 0);
146255847Sbostic 	for (i = 1; i < NPAREN; i++) {
146355847Sbostic 		if (p->pbegin[i] >= pos) {
146455847Sbostic 			p->pbegin[i]++;
146555847Sbostic 		}
146655847Sbostic 		if (p->pend[i] >= pos) {
146755847Sbostic 			p->pend[i]++;
146855847Sbostic 		}
146955847Sbostic 	}
147055847Sbostic 
147155847Sbostic 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
147255847Sbostic 						(HERE()-pos-1)*sizeof(sop));
147355847Sbostic 	p->strip[pos] = s;
147455847Sbostic }
147555847Sbostic 
147655847Sbostic /*
147755847Sbostic  - dofwd - complete a forward reference
147860201Sbostic  == static void dofwd(register struct parse *p, sopno pos, sop value);
147955847Sbostic  */
148055847Sbostic static void
148155847Sbostic dofwd(p, pos, value)
148255847Sbostic register struct parse *p;
148355847Sbostic register sopno pos;
148455847Sbostic sop value;
148555847Sbostic {
148655847Sbostic 	/* avoid making error situations worse */
148755847Sbostic 	if (p->error != 0)
148855847Sbostic 		return;
148955847Sbostic 
149055847Sbostic 	assert(value < 1<<OPSHIFT);
149155847Sbostic 	p->strip[pos] = OP(p->strip[pos]) | value;
149255847Sbostic }
149355847Sbostic 
149455847Sbostic /*
149555847Sbostic  - enlarge - enlarge the strip
149660201Sbostic  == static void enlarge(register struct parse *p, sopno size);
149755847Sbostic  */
149855847Sbostic static void
149955847Sbostic enlarge(p, size)
150055847Sbostic register struct parse *p;
150155847Sbostic register sopno size;
150255847Sbostic {
150355847Sbostic 	register sop *sp;
150455847Sbostic 
150555847Sbostic 	if (p->ssize >= size)
150655847Sbostic 		return;
150755847Sbostic 
150855847Sbostic 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
150955847Sbostic 	if (sp == NULL) {
151055847Sbostic 		SETERROR(REG_ESPACE);
151155847Sbostic 		return;
151255847Sbostic 	}
151355847Sbostic 	p->strip = sp;
151455847Sbostic 	p->ssize = size;
151555847Sbostic }
151655847Sbostic 
151755847Sbostic /*
151855847Sbostic  - stripsnug - compact the strip
151960201Sbostic  == static void stripsnug(register struct parse *p, register struct re_guts *g);
152055847Sbostic  */
152155847Sbostic static void
152255847Sbostic stripsnug(p, g)
152355847Sbostic register struct parse *p;
152455847Sbostic register struct re_guts *g;
152555847Sbostic {
152655847Sbostic 	g->nstates = p->slen;
1527*66362Sbostic 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
152855847Sbostic 	if (g->strip == NULL) {
152955847Sbostic 		SETERROR(REG_ESPACE);
153055847Sbostic 		g->strip = p->strip;
153155847Sbostic 	}
153255847Sbostic }
153355847Sbostic 
153455847Sbostic /*
153555847Sbostic  - findmust - fill in must and mlen with longest mandatory literal string
153660201Sbostic  == static void findmust(register struct parse *p, register struct re_guts *g);
153755847Sbostic  *
153855847Sbostic  * This algorithm could do fancy things like analyzing the operands of |
153955847Sbostic  * for common subsequences.  Someday.  This code is simple and finds most
154055847Sbostic  * of the interesting cases.
154155847Sbostic  *
154255847Sbostic  * Note that must and mlen got initialized during setup.
154355847Sbostic  */
154456355Sbostic static void
154555847Sbostic findmust(p, g)
154655847Sbostic struct parse *p;
154755847Sbostic register struct re_guts *g;
154855847Sbostic {
154955847Sbostic 	register sop *scan;
155055847Sbostic 	sop *start;
155155847Sbostic 	register sop *newstart;
155255847Sbostic 	register sopno newlen;
155355847Sbostic 	register sop s;
155455847Sbostic 	register char *cp;
155555847Sbostic 	register sopno i;
155655847Sbostic 
155755847Sbostic 	/* avoid making error situations worse */
155855847Sbostic 	if (p->error != 0)
155955847Sbostic 		return;
156055847Sbostic 
156155847Sbostic 	/* find the longest OCHAR sequence in strip */
156255847Sbostic 	newlen = 0;
156355847Sbostic 	scan = g->strip + 1;
156455847Sbostic 	do {
156555847Sbostic 		s = *scan++;
156655847Sbostic 		switch (OP(s)) {
156755847Sbostic 		case OCHAR:		/* sequence member */
156855847Sbostic 			if (newlen == 0)		/* new sequence */
156955847Sbostic 				newstart = scan - 1;
157055847Sbostic 			newlen++;
157155847Sbostic 			break;
157255847Sbostic 		case OPLUS_:		/* things that don't break one */
157355847Sbostic 		case OLPAREN:
157455847Sbostic 		case ORPAREN:
157555847Sbostic 			break;
157655847Sbostic 		case OQUEST_:		/* things that must be skipped */
157755847Sbostic 		case OCH_:
157855847Sbostic 			scan--;
157955847Sbostic 			do {
158055847Sbostic 				scan += OPND(s);
158155847Sbostic 				s = *scan;
158255847Sbostic 				/* assert() interferes w debug printouts */
158355847Sbostic 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
158455847Sbostic 							OP(s) != OOR2) {
158555847Sbostic 					g->iflags |= BAD;
158655847Sbostic 					return;
158755847Sbostic 				}
158855847Sbostic 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
158955847Sbostic 			/* fallthrough */
159055847Sbostic 		default:		/* things that break a sequence */
159155847Sbostic 			if (newlen > g->mlen) {		/* ends one */
159255847Sbostic 				start = newstart;
159355847Sbostic 				g->mlen = newlen;
159455847Sbostic 			}
159555847Sbostic 			newlen = 0;
159655847Sbostic 			break;
159755847Sbostic 		}
159855847Sbostic 	} while (OP(s) != OEND);
159955847Sbostic 
160055847Sbostic 	if (g->mlen == 0)		/* there isn't one */
160155847Sbostic 		return;
160255847Sbostic 
160355847Sbostic 	/* turn it into a character string */
160455847Sbostic 	g->must = malloc((size_t)g->mlen + 1);
160555847Sbostic 	if (g->must == NULL) {		/* argh; just forget it */
160655847Sbostic 		g->mlen = 0;
160755847Sbostic 		return;
160855847Sbostic 	}
160955847Sbostic 	cp = g->must;
161055847Sbostic 	scan = start;
161155847Sbostic 	for (i = g->mlen; i > 0; i--) {
161255847Sbostic 		while (OP(s = *scan++) != OCHAR)
161355847Sbostic 			continue;
1614*66362Sbostic 		assert(cp < g->must + g->mlen);
161560201Sbostic 		*cp++ = (char)OPND(s);
161655847Sbostic 	}
1617*66362Sbostic 	assert(cp == g->must + g->mlen);
161855847Sbostic 	*cp++ = '\0';		/* just on general principles */
161955847Sbostic }
162055847Sbostic 
162155847Sbostic /*
162255847Sbostic  - pluscount - count + nesting
162360201Sbostic  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
162455847Sbostic  */
162556355Sbostic static sopno			/* nesting depth */
162655847Sbostic pluscount(p, g)
162755847Sbostic struct parse *p;
162855847Sbostic register struct re_guts *g;
162955847Sbostic {
163055847Sbostic 	register sop *scan;
163155847Sbostic 	register sop s;
163255847Sbostic 	register sopno plusnest = 0;
163355847Sbostic 	register sopno maxnest = 0;
163455847Sbostic 
163555847Sbostic 	if (p->error != 0)
163655847Sbostic 		return(0);	/* there may not be an OEND */
163755847Sbostic 
163855847Sbostic 	scan = g->strip + 1;
163955847Sbostic 	do {
164055847Sbostic 		s = *scan++;
164155847Sbostic 		switch (OP(s)) {
164255847Sbostic 		case OPLUS_:
164355847Sbostic 			plusnest++;
164455847Sbostic 			break;
164555847Sbostic 		case O_PLUS:
164655847Sbostic 			if (plusnest > maxnest)
164755847Sbostic 				maxnest = plusnest;
164855847Sbostic 			plusnest--;
164955847Sbostic 			break;
165055847Sbostic 		}
165155847Sbostic 	} while (OP(s) != OEND);
165255847Sbostic 	if (plusnest != 0)
165355847Sbostic 		g->iflags |= BAD;
165455847Sbostic 	return(maxnest);
165555847Sbostic }
1656