xref: /csrg-svn/lib/libc/regex/regcomp.c (revision 55847)
1*55847Sbostic /*-
2*55847Sbostic  * Copyright (c) 1992 Henry Spencer.
3*55847Sbostic  * Copyright (c) 1992 The Regents of the University of California.
4*55847Sbostic  * All rights reserved.
5*55847Sbostic  *
6*55847Sbostic  * This code is derived from software contributed to Berkeley by
7*55847Sbostic  * Henry Spencer of the University of Toronto.
8*55847Sbostic  *
9*55847Sbostic  * %sccs.include.redist.c%
10*55847Sbostic  *
11*55847Sbostic  *	@(#)regcomp.c	5.1 (Berkeley) 08/06/92
12*55847Sbostic  */
13*55847Sbostic 
14*55847Sbostic #if defined(LIBC_SCCS) && !defined(lint)
15*55847Sbostic static char sccsid[] = "@(#)regcomp.c	5.1 (Berkeley) 08/06/92";
16*55847Sbostic #endif /* LIBC_SCCS and not lint */
17*55847Sbostic 
18*55847Sbostic #include <sys/types.h>
19*55847Sbostic 
20*55847Sbostic #include <stdio.h>
21*55847Sbostic #include <string.h>
22*55847Sbostic #include <ctype.h>
23*55847Sbostic #include <limits.h>
24*55847Sbostic #include <stdlib.h>
25*55847Sbostic #include <assert.h>
26*55847Sbostic #include <regex.h>
27*55847Sbostic 
28*55847Sbostic #include "utils.h"
29*55847Sbostic #include "regex2.h"
30*55847Sbostic 
31*55847Sbostic #include "cclass.h"
32*55847Sbostic #include "cname.h"
33*55847Sbostic 
34*55847Sbostic static uchar nuls[10];		/* place to point scanner in event of error */
35*55847Sbostic 
36*55847Sbostic /*
37*55847Sbostic  * parse structure, passed up and down to avoid global variables and
38*55847Sbostic  * other clumsinesses
39*55847Sbostic  */
40*55847Sbostic struct parse {
41*55847Sbostic 	uchar *next;		/* next character in RE */
42*55847Sbostic 	int error;		/* has an error been seen? */
43*55847Sbostic 	sop *strip;		/* malloced strip */
44*55847Sbostic 	sopno ssize;		/* malloced strip size (allocated) */
45*55847Sbostic 	sopno slen;		/* malloced strip length (used) */
46*55847Sbostic 	int ncsalloc;		/* number of csets allocated */
47*55847Sbostic 	struct re_guts *g;
48*55847Sbostic #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
49*55847Sbostic 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
50*55847Sbostic 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
51*55847Sbostic };
52*55847Sbostic 
53*55847Sbostic STATIC void	 doemit __P((struct parse *, sop, size_t));
54*55847Sbostic STATIC void	 dofwd __P((struct parse *, sopno, sop));
55*55847Sbostic STATIC void	 doinsert __P((struct parse *, sop, size_t, sopno));
56*55847Sbostic STATIC sopno	 dupl __P((struct parse *, sopno, sopno));
57*55847Sbostic STATIC void	 enlarge __P((struct parse *, sopno));
58*55847Sbostic STATIC void	 mcadd __P((struct parse *, cset *, uchar *));
59*55847Sbostic STATIC uchar	*mcfind __P((cset *, uchar *));
60*55847Sbostic STATIC int	 mcin __P((struct parse *, cset *, uchar *));
61*55847Sbostic STATIC void	 mcinvert __P((struct parse *, cset *));
62*55847Sbostic STATIC void	 mcsub __P((struct parse *, cset *, uchar *));
63*55847Sbostic STATIC int	 seterr __P((struct parse *, int));
64*55847Sbostic 
65*55847Sbostic /*
66*55847Sbostic  * macros for use with parse structure
67*55847Sbostic  * BEWARE:  these know that the parse structure is named `p' !!!
68*55847Sbostic  */
69*55847Sbostic #define	PEEK()	((uchar)*p->next)
70*55847Sbostic #define	PEEK2()	((uchar)*(p->next+1))
71*55847Sbostic #define	SEE(c)	(PEEK() == (c))
72*55847Sbostic #define	SEETWO(a, b)	(PEEK() == (a) && PEEK2() == (b))
73*55847Sbostic #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
74*55847Sbostic #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
75*55847Sbostic #define	NEXT()	(p->next++)
76*55847Sbostic #define	NEXT2()	(p->next += 2)
77*55847Sbostic #define	NEXTn(n)	(p->next += (n))
78*55847Sbostic #define	GETNEXT()	((uchar)*p->next++)
79*55847Sbostic #define	SETERROR(e)	seterr(p, (e))
80*55847Sbostic #define	REQUIRE(co, e)	((co) || SETERROR(e))
81*55847Sbostic #define	MUSTSEE(c, e)	(REQUIRE(PEEK() == (c), e))
82*55847Sbostic #define	MUSTEAT(c, e)	(REQUIRE(GETNEXT() == (c), e))
83*55847Sbostic #define	MUSTNOTSEE(c, e)	(REQUIRE(PEEK() != (c), e))
84*55847Sbostic #define	EMIT(sop, sopnd)	doemit(p, sop, (size_t)(sopnd))
85*55847Sbostic #define	INSERT(sop, pos)	doinsert(p, sop, HERE()-(pos)+1, pos)
86*55847Sbostic #define	FWD(pos)		dofwd(p, pos, HERE()-(pos))
87*55847Sbostic #define	BACK(sop, pos)		EMIT(sop, HERE()-pos)
88*55847Sbostic #define	HERE()		(p->slen)
89*55847Sbostic #define	THERE()		(p->slen - 1)
90*55847Sbostic #define	DROP(n)	(p->slen -= (n))
91*55847Sbostic 
92*55847Sbostic /*
93*55847Sbostic  - regcomp - interface for parser and compilation
94*55847Sbostic  */
95*55847Sbostic int				/* 0 success, otherwise REG_something */
96*55847Sbostic regcomp(preg, pattern, cflags)
97*55847Sbostic regex_t *preg;
98*55847Sbostic const char *pattern;
99*55847Sbostic int cflags;
100*55847Sbostic {
101*55847Sbostic 	struct parse pa;
102*55847Sbostic 	register struct re_guts *g;
103*55847Sbostic 	register struct parse *p = &pa;
104*55847Sbostic 	sopno nstates;
105*55847Sbostic 	register int i;
106*55847Sbostic 	STATIC void p_ere();
107*55847Sbostic 	STATIC void p_bre();
108*55847Sbostic 	STATIC void stripsnug();
109*55847Sbostic 	STATIC void categorize();
110*55847Sbostic 	STATIC void findmust();
111*55847Sbostic 	STATIC sopno pluscount();
112*55847Sbostic 
113*55847Sbostic 	/* do the mallocs early so failure handling is easy */
114*55847Sbostic 	/* the +NUC here is for the category table */
115*55847Sbostic 	g = (struct re_guts *)malloc(sizeof(struct re_guts) + NUC);
116*55847Sbostic 	if (g == NULL)
117*55847Sbostic 		return(REG_ESPACE);
118*55847Sbostic 	p->ssize = strlen(pattern)/2*3 + 1;
119*55847Sbostic 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
120*55847Sbostic 	p->slen = 0;
121*55847Sbostic 	if (p->strip == NULL) {
122*55847Sbostic 		free((char *)g);
123*55847Sbostic 		return(REG_ESPACE);
124*55847Sbostic 	}
125*55847Sbostic 
126*55847Sbostic 	/* set things up */
127*55847Sbostic 	p->g = g;
128*55847Sbostic 	p->next = (uchar *)pattern;
129*55847Sbostic 	p->error = 0;
130*55847Sbostic 	p->ncsalloc = 0;
131*55847Sbostic 	for (i = 0; i < NPAREN; i++) {
132*55847Sbostic 		p->pbegin[i] = 0;
133*55847Sbostic 		p->pend[i] = 0;
134*55847Sbostic 	}
135*55847Sbostic 	g->csetsize = NUC;
136*55847Sbostic 	g->sets = NULL;
137*55847Sbostic 	g->setbits = NULL;
138*55847Sbostic 	g->ncsets = 0;
139*55847Sbostic 	g->cflags = cflags;
140*55847Sbostic 	g->iflags = 0;
141*55847Sbostic 	g->must = NULL;
142*55847Sbostic 	g->mlen = 0;
143*55847Sbostic 	g->nsub = 0;
144*55847Sbostic 	g->ncategories = 1;	/* category 0 is "everything else" */
145*55847Sbostic 	g->categories = (uchar *)g + sizeof(struct re_guts);
146*55847Sbostic 	(void) memset((char *)g->categories, 0, NUC);
147*55847Sbostic 	g->backrefs = 0;
148*55847Sbostic 	g->nplus = 0;
149*55847Sbostic 
150*55847Sbostic 	/* do it */
151*55847Sbostic 	EMIT(OEND, 0);
152*55847Sbostic 	g->firststate = THERE();
153*55847Sbostic 	if (cflags&REG_EXTENDED)
154*55847Sbostic 		p_ere(p, '\0');
155*55847Sbostic 	else
156*55847Sbostic 		p_bre(p, '\0', '\0');
157*55847Sbostic 	EMIT(OEND, 0);
158*55847Sbostic 	g->laststate = THERE();
159*55847Sbostic 
160*55847Sbostic 	/* tidy up loose ends and fill things in */
161*55847Sbostic 	categorize(p, g);
162*55847Sbostic 	stripsnug(p, g);
163*55847Sbostic 	findmust(p, g);
164*55847Sbostic 	g->nplus = pluscount(p, g);
165*55847Sbostic 	g->magic = MAGIC2;
166*55847Sbostic 	preg->re_nsub = g->nsub;
167*55847Sbostic 	preg->re_g = g;
168*55847Sbostic 	preg->re_magic = MAGIC1;
169*55847Sbostic #ifdef NDEBUG
170*55847Sbostic 	/* not debugging, so can't rely on the assert() in regexec() */
171*55847Sbostic 	if (g->iflags&BAD)
172*55847Sbostic 		SETERROR(REG_ASSERT);
173*55847Sbostic #endif
174*55847Sbostic 
175*55847Sbostic 	/* win or lose, we're done */
176*55847Sbostic 	if (p->error != 0)	/* lose */
177*55847Sbostic 		regfree(preg);
178*55847Sbostic 	return(p->error);
179*55847Sbostic }
180*55847Sbostic 
181*55847Sbostic /*
182*55847Sbostic  - p_ere - ERE parser top level, concatenation and alternation
183*55847Sbostic  */
184*55847Sbostic static void
185*55847Sbostic p_ere(p, stop)
186*55847Sbostic register struct parse *p;
187*55847Sbostic uchar stop;			/* character this ERE should end at */
188*55847Sbostic {
189*55847Sbostic 	STATIC void p_ere_exp();
190*55847Sbostic 	register uchar c;
191*55847Sbostic 	register sopno prevback;
192*55847Sbostic 	register sopno prevfwd;
193*55847Sbostic 	register sopno conc;
194*55847Sbostic 	register int first = 1;		/* is this the first alternative? */
195*55847Sbostic 
196*55847Sbostic 	for (;;) {
197*55847Sbostic 		/* do a bunch of concatenated expressions */
198*55847Sbostic 		conc = HERE();
199*55847Sbostic 		while ((c = PEEK()) != '|' && c != stop && c != '\0')
200*55847Sbostic 			p_ere_exp(p);
201*55847Sbostic 		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
202*55847Sbostic 
203*55847Sbostic 		if (!EAT('|'))
204*55847Sbostic 			break;		/* NOTE BREAK OUT */
205*55847Sbostic 
206*55847Sbostic 		if (first) {
207*55847Sbostic 			INSERT(OCH_, conc);	/* offset is wrong */
208*55847Sbostic 			prevfwd = conc;
209*55847Sbostic 			prevback = conc;
210*55847Sbostic 			first = 0;
211*55847Sbostic 		}
212*55847Sbostic 		BACK(OOR1, prevback);
213*55847Sbostic 		prevback = THERE();
214*55847Sbostic 		FWD(prevfwd);			/* fix previous offset */
215*55847Sbostic 		prevfwd = HERE();
216*55847Sbostic 		EMIT(OOR2, 0);			/* offset is very wrong */
217*55847Sbostic 	}
218*55847Sbostic 
219*55847Sbostic 	if (!first) {		/* tail-end fixups */
220*55847Sbostic 		FWD(prevfwd);
221*55847Sbostic 		BACK(O_CH, prevback);
222*55847Sbostic 	}
223*55847Sbostic 
224*55847Sbostic 	assert(SEE(stop) || SEE('\0'));
225*55847Sbostic }
226*55847Sbostic 
227*55847Sbostic /*
228*55847Sbostic  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
229*55847Sbostic  */
230*55847Sbostic static void
231*55847Sbostic p_ere_exp(p)
232*55847Sbostic register struct parse *p;
233*55847Sbostic {
234*55847Sbostic 	STATIC int p_count();
235*55847Sbostic 	STATIC void p_bracket();
236*55847Sbostic 	STATIC void ordinary();
237*55847Sbostic 	STATIC void nonnewline();
238*55847Sbostic 	STATIC void repeat();
239*55847Sbostic 	register uchar c;
240*55847Sbostic 	register sopno pos;
241*55847Sbostic 	register sopno sub;
242*55847Sbostic 	register int count;
243*55847Sbostic 	register int count2;
244*55847Sbostic 	register sopno subno;
245*55847Sbostic 	int wascaret = 0;
246*55847Sbostic 	/* we call { a repetition if followed by a digit */
247*55847Sbostic #	define	ISRPT(c1, c2)	(c1 == '*' || c1 == '+' || c1 == '?' || \
248*55847Sbostic 						(c1 == '{' && isdigit(c2)))
249*55847Sbostic 
250*55847Sbostic 	c = GETNEXT();
251*55847Sbostic 	assert(c != '\0');	/* caller should have ensured this */
252*55847Sbostic 
253*55847Sbostic 	pos = HERE();
254*55847Sbostic 	switch (c) {
255*55847Sbostic 	case '(':
256*55847Sbostic 		MUSTNOTSEE('\0', REG_EPAREN);
257*55847Sbostic 		p->g->nsub++;
258*55847Sbostic 		subno = p->g->nsub;
259*55847Sbostic 		if (subno < NPAREN)
260*55847Sbostic 			p->pbegin[subno] = HERE();
261*55847Sbostic 		EMIT(OLPAREN, subno);
262*55847Sbostic 		if (!SEE(')'))
263*55847Sbostic 			p_ere(p, ')');
264*55847Sbostic 		if (subno < NPAREN) {
265*55847Sbostic 			p->pend[subno] = HERE();
266*55847Sbostic 			assert(p->pend[subno] != 0);
267*55847Sbostic 		}
268*55847Sbostic 		EMIT(ORPAREN, subno);
269*55847Sbostic 		MUSTEAT(')', REG_EPAREN);
270*55847Sbostic 		break;
271*55847Sbostic #ifndef POSIX_MISTAKE
272*55847Sbostic 	case ')':		/* happens only if no current unmatched ( */
273*55847Sbostic 		/*
274*55847Sbostic 		 * You may ask, why the ifndef?  Because I didn't notice
275*55847Sbostic 		 * this until slightly too late for 1003.2, and none of the
276*55847Sbostic 		 * other 1003.2 regular-expression reviewers noticed it at
277*55847Sbostic 		 * all.  So an unmatched ) is legal POSIX, at least until
278*55847Sbostic 		 * we can get it fixed.
279*55847Sbostic 		 */
280*55847Sbostic 		SETERROR(REG_EPAREN);
281*55847Sbostic 		break;
282*55847Sbostic #endif
283*55847Sbostic 	case '^':
284*55847Sbostic 		EMIT(OBOL, 0);
285*55847Sbostic 		p->g->iflags |= USEBOL;
286*55847Sbostic 		wascaret = 1;
287*55847Sbostic 		break;
288*55847Sbostic 	case '$':
289*55847Sbostic 		EMIT(OEOL, 0);
290*55847Sbostic 		p->g->iflags |= USEEOL;
291*55847Sbostic 		break;
292*55847Sbostic 	case '|':
293*55847Sbostic 		SETERROR(REG_EMPTY);
294*55847Sbostic 		break;
295*55847Sbostic 	case '*':
296*55847Sbostic 	case '+':
297*55847Sbostic 	case '?':
298*55847Sbostic 		SETERROR(REG_BADRPT);
299*55847Sbostic 		break;
300*55847Sbostic 	case '.':
301*55847Sbostic 		if (p->g->cflags&REG_NEWLINE)
302*55847Sbostic 			nonnewline(p);
303*55847Sbostic 		else
304*55847Sbostic 			EMIT(OANY, 0);
305*55847Sbostic 		break;
306*55847Sbostic 	case '[':
307*55847Sbostic 		p_bracket(p);
308*55847Sbostic 		break;
309*55847Sbostic 	case '\\':
310*55847Sbostic 		c = GETNEXT();
311*55847Sbostic 		if (c == '^' || c == '.' || c == '[' || c == '$' ||
312*55847Sbostic 				c == '(' || c == ')' || c == '|' ||
313*55847Sbostic 				c == '*' || c == '+' || c == '?' ||
314*55847Sbostic 				c == '{' || c == '\\')
315*55847Sbostic 			ordinary(p, c);
316*55847Sbostic 		else
317*55847Sbostic 			SETERROR(REG_EESCAPE);
318*55847Sbostic 		break;
319*55847Sbostic 	case '{':		/* okay as ordinary except if digit follows */
320*55847Sbostic 		REQUIRE(!isdigit(PEEK()), REG_BADRPT);
321*55847Sbostic 		/* FALLTHROUGH */
322*55847Sbostic 	default:
323*55847Sbostic 		ordinary(p, c);
324*55847Sbostic 		break;
325*55847Sbostic 	}
326*55847Sbostic 
327*55847Sbostic 	c = PEEK();
328*55847Sbostic 	if (!ISRPT(c, PEEK2()))
329*55847Sbostic 		return;		/* no repetition, we're done */
330*55847Sbostic 	NEXT();
331*55847Sbostic 
332*55847Sbostic 	REQUIRE(!wascaret, REG_BADRPT);
333*55847Sbostic 	switch (c) {
334*55847Sbostic 	case '*':	/* implemented as +? */
335*55847Sbostic 		INSERT(OPLUS_, pos);
336*55847Sbostic 		BACK(O_PLUS, pos);
337*55847Sbostic 		INSERT(OQUEST_, pos);
338*55847Sbostic 		BACK(O_QUEST, pos);
339*55847Sbostic 		break;
340*55847Sbostic 	case '+':
341*55847Sbostic 		INSERT(OPLUS_, pos);
342*55847Sbostic 		BACK(O_PLUS, pos);
343*55847Sbostic 		break;
344*55847Sbostic 	case '?':
345*55847Sbostic 		INSERT(OQUEST_, pos);
346*55847Sbostic 		BACK(O_QUEST, pos);
347*55847Sbostic 		break;
348*55847Sbostic 	case '{':
349*55847Sbostic 		count = p_count(p);
350*55847Sbostic 		if (EAT(',')) {
351*55847Sbostic 			if (isdigit(PEEK())) {
352*55847Sbostic 				count2 = p_count(p);
353*55847Sbostic 				REQUIRE(count <= count2, REG_BADBR);
354*55847Sbostic 			} else		/* single number with comma */
355*55847Sbostic 				count2 = INFINITY;
356*55847Sbostic 		} else		/* just a single number */
357*55847Sbostic 			count2 = count;
358*55847Sbostic 		repeat(p, pos, count, count2);
359*55847Sbostic 		if (!EAT('}')) {	/* error heuristics */
360*55847Sbostic 			while ((c = PEEK()) != '\0' && c != '}')
361*55847Sbostic 				NEXT();
362*55847Sbostic 			if (c == '\0')
363*55847Sbostic 				SETERROR(REG_EBRACE);
364*55847Sbostic 			else
365*55847Sbostic 				SETERROR(REG_BADBR);
366*55847Sbostic 		}
367*55847Sbostic 		break;
368*55847Sbostic 	}
369*55847Sbostic 
370*55847Sbostic 	c = PEEK();
371*55847Sbostic 	REQUIRE(!ISRPT(c, PEEK2()), REG_BADRPT);
372*55847Sbostic }
373*55847Sbostic 
374*55847Sbostic /*
375*55847Sbostic  - p_bre - BRE parser top level, anchoring and concatenation
376*55847Sbostic  *
377*55847Sbostic  * Giving end1 as '\0' essentially eliminates the end1/end2 check.
378*55847Sbostic  */
379*55847Sbostic static void
380*55847Sbostic p_bre(p, end1, end2)
381*55847Sbostic register struct parse *p;
382*55847Sbostic register uchar end1;		/* first terminating character */
383*55847Sbostic register uchar end2;		/* second terminating character */
384*55847Sbostic {
385*55847Sbostic 	STATIC int p_simp_re();
386*55847Sbostic 	register sopno start = HERE();
387*55847Sbostic 	register int first = 1;			/* first subexpression? */
388*55847Sbostic 	register int wasdollar;
389*55847Sbostic 
390*55847Sbostic 	if (EAT('^')) {
391*55847Sbostic 		EMIT(OBOL, 0);
392*55847Sbostic 		p->g->iflags |= USEBOL;
393*55847Sbostic 	}
394*55847Sbostic 	while (!SEE('\0') && !SEETWO(end1, end2)) {
395*55847Sbostic 		wasdollar = p_simp_re(p, first);
396*55847Sbostic 		first = 0;
397*55847Sbostic 	}
398*55847Sbostic 	if (wasdollar) {	/* oops, that was a trailing anchor */
399*55847Sbostic 		DROP(1);
400*55847Sbostic 		EMIT(OEOL, 0);
401*55847Sbostic 		p->g->iflags |= USEEOL;
402*55847Sbostic 	}
403*55847Sbostic 
404*55847Sbostic 	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
405*55847Sbostic }
406*55847Sbostic 
407*55847Sbostic /*
408*55847Sbostic  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
409*55847Sbostic  */
410*55847Sbostic static int			/* was the simple RE an unbackslashed $? */
411*55847Sbostic p_simp_re(p, starordinary)
412*55847Sbostic register struct parse *p;
413*55847Sbostic int starordinary;		/* is a leading * an ordinary character? */
414*55847Sbostic {
415*55847Sbostic 	STATIC int p_count();
416*55847Sbostic 	STATIC void p_bracket();
417*55847Sbostic 	STATIC void ordinary();
418*55847Sbostic 	STATIC void nonnewline();
419*55847Sbostic 	STATIC void repeat();
420*55847Sbostic 	register int c;
421*55847Sbostic 	register int count;
422*55847Sbostic 	register int count2;
423*55847Sbostic 	register sopno pos;
424*55847Sbostic 	register sopno sub;
425*55847Sbostic 	register int i;
426*55847Sbostic 	register sopno subno;
427*55847Sbostic #	define	BACKSL	(1<<CHAR_BIT)
428*55847Sbostic 
429*55847Sbostic 	pos = HERE();		/* repetion op, if any, covers from here */
430*55847Sbostic 
431*55847Sbostic 	c = GETNEXT();
432*55847Sbostic 	assert(c != '\0');	/* caller should have ensured this */
433*55847Sbostic 	if (c == '\\')
434*55847Sbostic 		c = BACKSL | PEEK();
435*55847Sbostic 	switch (c) {
436*55847Sbostic 	case '.':
437*55847Sbostic 		if (p->g->cflags&REG_NEWLINE)
438*55847Sbostic 			nonnewline(p);
439*55847Sbostic 		else
440*55847Sbostic 			EMIT(OANY, 0);
441*55847Sbostic 		break;
442*55847Sbostic 	case '[':
443*55847Sbostic 		p_bracket(p);
444*55847Sbostic 		break;
445*55847Sbostic 	case BACKSL|'^':
446*55847Sbostic 	case BACKSL|'.':
447*55847Sbostic 	case BACKSL|'*':
448*55847Sbostic 	case BACKSL|'[':
449*55847Sbostic 	case BACKSL|'$':
450*55847Sbostic 	case BACKSL|'\\':
451*55847Sbostic 		ordinary(p, c&~BACKSL);
452*55847Sbostic 		NEXT();
453*55847Sbostic 		break;
454*55847Sbostic 	case BACKSL|'{':
455*55847Sbostic 		SETERROR(REG_BADRPT);
456*55847Sbostic 		break;
457*55847Sbostic 	case BACKSL|'(':
458*55847Sbostic 		NEXT();
459*55847Sbostic 		p->g->nsub++;
460*55847Sbostic 		subno = p->g->nsub;
461*55847Sbostic 		if (subno < NPAREN)
462*55847Sbostic 			p->pbegin[subno] = HERE();
463*55847Sbostic 		EMIT(OLPAREN, subno);
464*55847Sbostic 		/* the SEE here is an error heuristic */
465*55847Sbostic 		if (!SEE('\0') && !SEETWO('\\', ')'))
466*55847Sbostic 			p_bre(p, '\\', ')');
467*55847Sbostic 		if (subno < NPAREN) {
468*55847Sbostic 			p->pend[subno] = HERE();
469*55847Sbostic 			assert(p->pend[subno] != 0);
470*55847Sbostic 		}
471*55847Sbostic 		EMIT(ORPAREN, subno);
472*55847Sbostic 		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
473*55847Sbostic 		break;
474*55847Sbostic 	case BACKSL|')':	/* should not get here -- must be user */
475*55847Sbostic 	case BACKSL|'}':
476*55847Sbostic 		SETERROR(REG_EPAREN);
477*55847Sbostic 		break;
478*55847Sbostic 	case BACKSL|'1':
479*55847Sbostic 	case BACKSL|'2':
480*55847Sbostic 	case BACKSL|'3':
481*55847Sbostic 	case BACKSL|'4':
482*55847Sbostic 	case BACKSL|'5':
483*55847Sbostic 	case BACKSL|'6':
484*55847Sbostic 	case BACKSL|'7':
485*55847Sbostic 	case BACKSL|'8':
486*55847Sbostic 	case BACKSL|'9':
487*55847Sbostic 		i = (c&~BACKSL) - '0';
488*55847Sbostic 		assert(i < NPAREN);
489*55847Sbostic 		if (p->pend[i] != 0) {
490*55847Sbostic 			assert(i <= p->g->nsub);
491*55847Sbostic 			EMIT(OBACK_, i);
492*55847Sbostic 			assert(p->pbegin[i] != 0);
493*55847Sbostic 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
494*55847Sbostic 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
495*55847Sbostic 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
496*55847Sbostic 			EMIT(O_BACK, i);
497*55847Sbostic 		} else
498*55847Sbostic 			SETERROR(REG_ESUBREG);
499*55847Sbostic 		p->g->backrefs = 1;
500*55847Sbostic 		NEXT();
501*55847Sbostic 		break;
502*55847Sbostic 	case '*':
503*55847Sbostic 		REQUIRE(starordinary, REG_BADRPT);
504*55847Sbostic 		/* FALLTHROUGH */
505*55847Sbostic 	default:
506*55847Sbostic 		if (c & BACKSL) {
507*55847Sbostic 			SETERROR(REG_EESCAPE);
508*55847Sbostic 			c &= ~BACKSL;
509*55847Sbostic 		}
510*55847Sbostic 		ordinary(p, (uchar)c);
511*55847Sbostic 		break;
512*55847Sbostic 	}
513*55847Sbostic 
514*55847Sbostic 	if (EAT('*')) {		/* implemented as +? */
515*55847Sbostic 		INSERT(OPLUS_, pos);
516*55847Sbostic 		BACK(O_PLUS, pos);
517*55847Sbostic 		INSERT(OQUEST_, pos);
518*55847Sbostic 		BACK(O_QUEST, pos);
519*55847Sbostic 	} else if (EATTWO('\\', '{')) {
520*55847Sbostic 		count = p_count(p);
521*55847Sbostic 		if (EAT(',')) {
522*55847Sbostic 			if (isdigit(PEEK())) {
523*55847Sbostic 				count2 = p_count(p);
524*55847Sbostic 				REQUIRE(count <= count2, REG_BADBR);
525*55847Sbostic 			} else		/* single number with comma */
526*55847Sbostic 				count2 = INFINITY;
527*55847Sbostic 		} else		/* just a single number */
528*55847Sbostic 			count2 = count;
529*55847Sbostic 		repeat(p, pos, count, count2);
530*55847Sbostic 		if (!EATTWO('\\', '}')) {	/* error heuristics */
531*55847Sbostic 			while (!SEE('\0') && !SEETWO('\\', '}'))
532*55847Sbostic 				NEXT();
533*55847Sbostic 			if (SEE('\0'))
534*55847Sbostic 				SETERROR(REG_EBRACE);
535*55847Sbostic 			else
536*55847Sbostic 				SETERROR(REG_BADBR);
537*55847Sbostic 		}
538*55847Sbostic 	} else if (c == '$')	/* unbackslashed $ not followed by reptn */
539*55847Sbostic 		return(1);
540*55847Sbostic 
541*55847Sbostic 	return(0);
542*55847Sbostic }
543*55847Sbostic 
544*55847Sbostic /*
545*55847Sbostic  - p_count - parse a repetition count
546*55847Sbostic  */
547*55847Sbostic static int			/* the value */
548*55847Sbostic p_count(p)
549*55847Sbostic register struct parse *p;
550*55847Sbostic {
551*55847Sbostic 	register int count = 0;
552*55847Sbostic 	register int ndigits = 0;
553*55847Sbostic 
554*55847Sbostic 	while (isdigit(PEEK()) && count <= DUPMAX) {
555*55847Sbostic 		count = count*10 + (GETNEXT() - '0');
556*55847Sbostic 		ndigits++;
557*55847Sbostic 	}
558*55847Sbostic 
559*55847Sbostic 	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
560*55847Sbostic 	return(count);
561*55847Sbostic }
562*55847Sbostic 
563*55847Sbostic /*
564*55847Sbostic  - p_bracket - parse a bracketed character list
565*55847Sbostic  *
566*55847Sbostic  * Note a significant property of this code:  if the allocset() did SETERROR,
567*55847Sbostic  * no set operations are done.
568*55847Sbostic  */
569*55847Sbostic static void
570*55847Sbostic p_bracket(p)
571*55847Sbostic register struct parse *p;
572*55847Sbostic {
573*55847Sbostic 	STATIC void p_b_term();
574*55847Sbostic 	STATIC cset *allocset();
575*55847Sbostic 	STATIC int freezeset();
576*55847Sbostic 	register uchar c;
577*55847Sbostic 	register cset *cs = allocset(p);
578*55847Sbostic 	register int invert = 0;
579*55847Sbostic 
580*55847Sbostic 	if (EAT('^'))
581*55847Sbostic 		invert++;	/* make note to invert set at end */
582*55847Sbostic 	if (EAT(']'))
583*55847Sbostic 		CHadd(cs, ']');
584*55847Sbostic 	while ((c = PEEK()) != '\0' && c != ']' && !SEETWO('-', ']'))
585*55847Sbostic 		p_b_term(p, cs);
586*55847Sbostic 	if (EAT('-'))
587*55847Sbostic 		CHadd(cs, '-');
588*55847Sbostic 	MUSTEAT(']', REG_EBRACK);
589*55847Sbostic 
590*55847Sbostic 	if (invert && p->error == 0) {
591*55847Sbostic 		register int i;
592*55847Sbostic 
593*55847Sbostic 		for (i = p->g->csetsize - 1; i >= 0; i--)
594*55847Sbostic 			if (CHIN(cs, i))
595*55847Sbostic 				CHsub(cs, i);
596*55847Sbostic 			else
597*55847Sbostic 				CHadd(cs, i);
598*55847Sbostic 		if (p->g->cflags&REG_NEWLINE)
599*55847Sbostic 			CHsub(cs, '\n');
600*55847Sbostic 		if (cs->multis != NULL)
601*55847Sbostic 			mcinvert(p, cs);
602*55847Sbostic 	}
603*55847Sbostic 	assert(cs->multis == NULL);		/* xxx */
604*55847Sbostic 	EMIT(OANYOF, freezeset(p, cs));
605*55847Sbostic }
606*55847Sbostic 
607*55847Sbostic /*
608*55847Sbostic  - p_b_term - parse one term of a bracketed character list
609*55847Sbostic  */
610*55847Sbostic static void
611*55847Sbostic p_b_term(p, cs)
612*55847Sbostic register struct parse *p;
613*55847Sbostic register cset *cs;
614*55847Sbostic {
615*55847Sbostic 	STATIC uchar p_b_symbol();
616*55847Sbostic 	STATIC void p_b_cclass();
617*55847Sbostic 	STATIC void p_b_eclass();
618*55847Sbostic 	STATIC uchar othercase();
619*55847Sbostic 	register uchar c;
620*55847Sbostic 	register uchar start, finish;
621*55847Sbostic 	register int i;
622*55847Sbostic 
623*55847Sbostic 	/* classify what we've got */
624*55847Sbostic 	switch (PEEK()) {
625*55847Sbostic 	case '[':
626*55847Sbostic 		c = PEEK2();
627*55847Sbostic 		break;
628*55847Sbostic 	case '-':
629*55847Sbostic 		SETERROR(REG_ERANGE);
630*55847Sbostic 		return;			/* NOTE RETURN */
631*55847Sbostic 		break;
632*55847Sbostic 	default:
633*55847Sbostic 		c = '\0';
634*55847Sbostic 		break;
635*55847Sbostic 	}
636*55847Sbostic 
637*55847Sbostic 	switch (c) {
638*55847Sbostic 	case ':':		/* character class */
639*55847Sbostic 		NEXT2();
640*55847Sbostic 		c = PEEK();
641*55847Sbostic 		REQUIRE(c != '\0', REG_EBRACK);
642*55847Sbostic 		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
643*55847Sbostic 		p_b_cclass(p, cs);
644*55847Sbostic 		MUSTNOTSEE('\0', REG_EBRACK);
645*55847Sbostic 		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
646*55847Sbostic 		break;
647*55847Sbostic 	case '=':		/* equivalence class */
648*55847Sbostic 		NEXT2();
649*55847Sbostic 		c = PEEK();
650*55847Sbostic 		REQUIRE(c != '\0', REG_EBRACK);
651*55847Sbostic 		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
652*55847Sbostic 		p_b_eclass(p, cs);
653*55847Sbostic 		MUSTNOTSEE('\0', REG_EBRACK);
654*55847Sbostic 		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
655*55847Sbostic 		break;
656*55847Sbostic 	default:		/* symbol, ordinary character, or range */
657*55847Sbostic /* xxx revision needed for multichar stuff */
658*55847Sbostic 		start = p_b_symbol(p);
659*55847Sbostic 		if (PEEK() == '-' && (c = PEEK2()) != ']' && c != '\0') {
660*55847Sbostic 			/* range */
661*55847Sbostic 			NEXT();
662*55847Sbostic 			if (EAT('-'))
663*55847Sbostic 				finish = '-';
664*55847Sbostic 			else
665*55847Sbostic 				finish = p_b_symbol(p);
666*55847Sbostic 		} else
667*55847Sbostic 			finish = start;
668*55847Sbostic 		REQUIRE(start <= finish, REG_ERANGE);
669*55847Sbostic 		for (i = start; i <= finish; i++) {
670*55847Sbostic 			CHadd(cs, i);
671*55847Sbostic 			if ((p->g->cflags&REG_ICASE) && isalpha(i)) {
672*55847Sbostic 				c = othercase((uchar)i);
673*55847Sbostic 				CHadd(cs, c);
674*55847Sbostic 			}
675*55847Sbostic 		}
676*55847Sbostic 		break;
677*55847Sbostic 	}
678*55847Sbostic }
679*55847Sbostic 
680*55847Sbostic /*
681*55847Sbostic  - p_b_cclass - parse a character-class name and deal with it
682*55847Sbostic  */
683*55847Sbostic static void
684*55847Sbostic p_b_cclass(p, cs)
685*55847Sbostic register struct parse *p;
686*55847Sbostic register cset *cs;
687*55847Sbostic {
688*55847Sbostic 	register uchar *sb = p->next;
689*55847Sbostic 	register uchar *se = sb;
690*55847Sbostic 	register struct cclass *cp;
691*55847Sbostic 	register int len;
692*55847Sbostic 	register uchar *u;
693*55847Sbostic 	register uchar c;
694*55847Sbostic 
695*55847Sbostic 	while (isalpha(*se))
696*55847Sbostic 		se++;
697*55847Sbostic 	len = se - sb;
698*55847Sbostic 	NEXTn(len);
699*55847Sbostic 	for (cp = cclasses; cp->name != NULL; cp++)
700*55847Sbostic 		if (strncmp(cp->name, (char *)sb, len) == 0 &&
701*55847Sbostic 		    cp->name[len] == '\0')
702*55847Sbostic 			break;
703*55847Sbostic 	if (cp->name == NULL) {
704*55847Sbostic 		/* oops, didn't find it */
705*55847Sbostic 		SETERROR(REG_ECTYPE);
706*55847Sbostic 		return;
707*55847Sbostic 	}
708*55847Sbostic 
709*55847Sbostic 	u = (uchar *)cp->chars;
710*55847Sbostic 	while ((c = *u++) != '\0')
711*55847Sbostic 		CHadd(cs, c);
712*55847Sbostic 	for (u = (uchar *)cp->multis; *u != '\0'; u += strlen((char *)u) + 1)
713*55847Sbostic 		MCadd(cs, u);
714*55847Sbostic }
715*55847Sbostic 
716*55847Sbostic /*
717*55847Sbostic  - p_b_eclass - parse an equivalence-class name and deal with it
718*55847Sbostic  *
719*55847Sbostic  * This implementation is incomplete. xxx
720*55847Sbostic  */
721*55847Sbostic static void
722*55847Sbostic p_b_eclass(p, cs)
723*55847Sbostic register struct parse *p;
724*55847Sbostic register cset *cs;
725*55847Sbostic {
726*55847Sbostic 	register uchar c;
727*55847Sbostic 	STATIC uchar p_b_coll_elem();
728*55847Sbostic 
729*55847Sbostic 	c = p_b_coll_elem(p, '=');
730*55847Sbostic 	CHadd(cs, c);
731*55847Sbostic }
732*55847Sbostic 
733*55847Sbostic /*
734*55847Sbostic  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
735*55847Sbostic  */
736*55847Sbostic static uchar			/* value of symbol */
737*55847Sbostic p_b_symbol(p)
738*55847Sbostic register struct parse *p;
739*55847Sbostic {
740*55847Sbostic 	STATIC uchar p_b_coll_elem();
741*55847Sbostic 	register uchar value;
742*55847Sbostic 
743*55847Sbostic 	if (!EATTWO('[', '.')) {
744*55847Sbostic 		MUSTNOTSEE('\0', REG_EBRACK);
745*55847Sbostic 		return(GETNEXT());
746*55847Sbostic 	}
747*55847Sbostic 
748*55847Sbostic 	/* collating symbol */
749*55847Sbostic 	MUSTNOTSEE('\0', REG_EBRACK);
750*55847Sbostic 	value = p_b_coll_elem(p, '.');
751*55847Sbostic 	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
752*55847Sbostic 	return(value);
753*55847Sbostic }
754*55847Sbostic 
755*55847Sbostic /*
756*55847Sbostic  - p_b_coll_elem - parse a collating-element name and look it up
757*55847Sbostic  */
758*55847Sbostic static uchar			/* value of collating element */
759*55847Sbostic p_b_coll_elem(p, endc)
760*55847Sbostic register struct parse *p;
761*55847Sbostic uchar endc;			/* name ended by endc,']' */
762*55847Sbostic {
763*55847Sbostic 	register uchar *sp = p->next;
764*55847Sbostic 	register struct cname *cp;
765*55847Sbostic 	register int len;
766*55847Sbostic 	register uchar c;
767*55847Sbostic 
768*55847Sbostic 	while ((c = PEEK()) != '\0' && !SEETWO(endc, ']'))
769*55847Sbostic 		NEXT();
770*55847Sbostic 	if (c == '\0') {
771*55847Sbostic 		SETERROR(REG_EBRACK);
772*55847Sbostic 		return(0);
773*55847Sbostic 	}
774*55847Sbostic 	len = p->next - sp;
775*55847Sbostic 	for (cp = cnames; cp->name != NULL; cp++)
776*55847Sbostic 		if (strncmp(cp->name, (char *)sp, len) == 0 &&
777*55847Sbostic 		    cp->name[len] == '\0')
778*55847Sbostic 			return(cp->code);	/* known name */
779*55847Sbostic 	if (len == 1)
780*55847Sbostic 		return(*sp);	/* single character */
781*55847Sbostic 	SETERROR(REG_ECOLLATE);			/* neither */
782*55847Sbostic 	return(0);
783*55847Sbostic }
784*55847Sbostic 
785*55847Sbostic /*
786*55847Sbostic  - othercase - return the case counterpart of an alphabetic
787*55847Sbostic  */
788*55847Sbostic static uchar
789*55847Sbostic othercase(ch)
790*55847Sbostic uchar ch;
791*55847Sbostic {
792*55847Sbostic 	assert(isalpha(ch));
793*55847Sbostic 	if (isupper(ch))
794*55847Sbostic 		return(tolower(ch));
795*55847Sbostic 	else if (islower(ch))
796*55847Sbostic 		return(toupper(ch));
797*55847Sbostic 	else			/* peculiar, but could happen */
798*55847Sbostic 		return(ch);
799*55847Sbostic }
800*55847Sbostic 
801*55847Sbostic /*
802*55847Sbostic  - bothcases - emit a dualcase version of a character
803*55847Sbostic  * Boy, is this implementation ever a kludge...
804*55847Sbostic  */
805*55847Sbostic static void
806*55847Sbostic bothcases(p, ch)
807*55847Sbostic register struct parse *p;
808*55847Sbostic uchar ch;
809*55847Sbostic {
810*55847Sbostic 	register uchar *oldnext;
811*55847Sbostic 	uchar bracket[3];
812*55847Sbostic 
813*55847Sbostic 	oldnext = p->next;
814*55847Sbostic 	p->next = bracket;
815*55847Sbostic 	bracket[0] = ch;
816*55847Sbostic 	bracket[1] = ']';
817*55847Sbostic 	bracket[2] = '\0';
818*55847Sbostic 	p_bracket(p);
819*55847Sbostic 	assert(p->next == bracket+2);
820*55847Sbostic 	p->next = oldnext;
821*55847Sbostic }
822*55847Sbostic 
823*55847Sbostic /*
824*55847Sbostic  - ordinary - emit an ordinary character
825*55847Sbostic  */
826*55847Sbostic static void
827*55847Sbostic ordinary(p, ch)
828*55847Sbostic register struct parse *p;
829*55847Sbostic register uchar ch;
830*55847Sbostic {
831*55847Sbostic 	register uchar *cap = p->g->categories;
832*55847Sbostic 
833*55847Sbostic 	if ((p->g->cflags&REG_ICASE) && isalpha(ch)) {
834*55847Sbostic 		bothcases(p, ch);
835*55847Sbostic 		return;
836*55847Sbostic 	}
837*55847Sbostic 
838*55847Sbostic 	EMIT(OCHAR, ch);
839*55847Sbostic 	if (cap[ch] == 0)
840*55847Sbostic 		cap[ch] = p->g->ncategories++;
841*55847Sbostic }
842*55847Sbostic 
843*55847Sbostic /*
844*55847Sbostic  - nonnewline - emit REG_NEWLINE version of OANY
845*55847Sbostic  * Boy, is this implementation ever a kludge...
846*55847Sbostic  */
847*55847Sbostic static void
848*55847Sbostic nonnewline(p)
849*55847Sbostic register struct parse *p;
850*55847Sbostic {
851*55847Sbostic 	register uchar *oldnext;
852*55847Sbostic 	uchar bracket[4];
853*55847Sbostic 
854*55847Sbostic 	oldnext = p->next;
855*55847Sbostic 	p->next = bracket;
856*55847Sbostic 	bracket[0] = '^';
857*55847Sbostic 	bracket[1] = '\n';
858*55847Sbostic 	bracket[2] = ']';
859*55847Sbostic 	bracket[3] = '\0';
860*55847Sbostic 	p_bracket(p);
861*55847Sbostic 	assert(p->next == bracket+3);
862*55847Sbostic 	p->next = oldnext;
863*55847Sbostic }
864*55847Sbostic 
865*55847Sbostic /*
866*55847Sbostic  - repeat - generate code for a bounded repetition, recursively if needed
867*55847Sbostic  */
868*55847Sbostic static void
869*55847Sbostic repeat(p, start, from, to)
870*55847Sbostic register struct parse *p;
871*55847Sbostic sopno start;			/* operand from here to end of strip */
872*55847Sbostic int from;			/* repeated from this number */
873*55847Sbostic int to;				/* to this number of times (maybe INFINITY) */
874*55847Sbostic {
875*55847Sbostic 	register sopno finish = HERE();
876*55847Sbostic #	define	N	2
877*55847Sbostic #	define	INF	3
878*55847Sbostic #	define	REP(f, t)	((f)*8 + (t))
879*55847Sbostic #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
880*55847Sbostic 	register sopno copy;
881*55847Sbostic 
882*55847Sbostic 	if (p->error != 0)	/* head off possible runaway recursion */
883*55847Sbostic 		return;
884*55847Sbostic 
885*55847Sbostic 	assert(from <= to);
886*55847Sbostic 
887*55847Sbostic 	switch (REP(MAP(from), MAP(to))) {
888*55847Sbostic 	case REP(0, 0):			/* must be user doing this */
889*55847Sbostic 		DROP(finish-start);	/* drop the operand */
890*55847Sbostic 		break;
891*55847Sbostic 	case REP(0, 1):			/* as x{1,1}? */
892*55847Sbostic 	case REP(0, N):			/* as x{1,n}? */
893*55847Sbostic 	case REP(0, INF):		/* as x{1,}? */
894*55847Sbostic 		INSERT(OQUEST_, start);		/* offset is wrong... */
895*55847Sbostic 		repeat(p, start+1, 1, to);
896*55847Sbostic 		FWD(start);			/* ... fix it */
897*55847Sbostic 		BACK(O_QUEST, start);
898*55847Sbostic 		break;
899*55847Sbostic 	case REP(1, 1):			/* trivial case */
900*55847Sbostic 		/* done */
901*55847Sbostic 		break;
902*55847Sbostic 	case REP(1, N):			/* as x?x{1,n-1} */
903*55847Sbostic 		INSERT(OQUEST_, start);
904*55847Sbostic 		BACK(O_QUEST, start);
905*55847Sbostic 		copy = dupl(p, start+1, finish+1);
906*55847Sbostic 		assert(copy == finish+2);
907*55847Sbostic 		repeat(p, copy, 1, to-1);
908*55847Sbostic 		break;
909*55847Sbostic 	case REP(1, INF):		/* as x+ */
910*55847Sbostic 		INSERT(OPLUS_, start);
911*55847Sbostic 		BACK(O_PLUS, start);
912*55847Sbostic 		break;
913*55847Sbostic 	case REP(N, N):			/* as xx{m-1,n-1} */
914*55847Sbostic 		copy = dupl(p, start, finish);
915*55847Sbostic 		repeat(p, copy, from-1, to-1);
916*55847Sbostic 		break;
917*55847Sbostic 	case REP(N, INF):		/* as xx{n-1,INF} */
918*55847Sbostic 		copy = dupl(p, start, finish);
919*55847Sbostic 		repeat(p, copy, from-1, to);
920*55847Sbostic 		break;
921*55847Sbostic 	default:			/* "can't happen" */
922*55847Sbostic 		SETERROR(REG_ASSERT);	/* just in case */
923*55847Sbostic 		break;
924*55847Sbostic 	}
925*55847Sbostic }
926*55847Sbostic 
927*55847Sbostic /*
928*55847Sbostic  - seterr - set an error condition
929*55847Sbostic  */
930*55847Sbostic static int			/* useless but makes type checking happy */
931*55847Sbostic seterr(p, e)
932*55847Sbostic register struct parse *p;
933*55847Sbostic int e;
934*55847Sbostic {
935*55847Sbostic 	if (p->error == 0)	/* keep earliest error condition */
936*55847Sbostic 		p->error = e;
937*55847Sbostic 	p->next = nuls;		/* try to bring things to a halt */
938*55847Sbostic 	return(0);		/* make the return value well-defined */
939*55847Sbostic }
940*55847Sbostic 
941*55847Sbostic /*
942*55847Sbostic  - allocset - allocate a set of characters for []
943*55847Sbostic  */
944*55847Sbostic static cset *
945*55847Sbostic allocset(p)
946*55847Sbostic register struct parse *p;
947*55847Sbostic {
948*55847Sbostic 	register int no = p->g->ncsets++;
949*55847Sbostic 	register size_t nc;
950*55847Sbostic 	register size_t nbytes;
951*55847Sbostic 	register cset *cs;
952*55847Sbostic 	register int i;
953*55847Sbostic 	register size_t css = (size_t)p->g->csetsize;
954*55847Sbostic 
955*55847Sbostic 	if (no >= p->ncsalloc) {	/* need another column of space */
956*55847Sbostic 		p->ncsalloc += CHAR_BIT;
957*55847Sbostic 		nc = p->ncsalloc;
958*55847Sbostic 		assert(nc % CHAR_BIT == 0);
959*55847Sbostic 		nbytes = nc / CHAR_BIT * css;
960*55847Sbostic 		if (p->g->sets == NULL)
961*55847Sbostic 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
962*55847Sbostic 		else
963*55847Sbostic 			p->g->sets = (cset *)realloc((char *)p->g->sets,
964*55847Sbostic 							nc * sizeof(cset));
965*55847Sbostic 		if (p->g->setbits == NULL)
966*55847Sbostic 			p->g->setbits = (uchar *)malloc(nbytes);
967*55847Sbostic 		else
968*55847Sbostic 			p->g->setbits = (uchar *)realloc((char *)p->g->setbits,
969*55847Sbostic 								nbytes);
970*55847Sbostic 		if (p->g->sets != NULL && p->g->setbits != NULL)
971*55847Sbostic 			(void) memset((char *)p->g->setbits + (nbytes - css),
972*55847Sbostic 								0, css);
973*55847Sbostic 		else {
974*55847Sbostic 			no = 0;
975*55847Sbostic 			SETERROR(REG_ESPACE);
976*55847Sbostic 			/* caller's responsibility not to do set ops */
977*55847Sbostic 		}
978*55847Sbostic 	}
979*55847Sbostic 
980*55847Sbostic 	assert(p->g->sets != NULL);	/* xxx */
981*55847Sbostic 	cs = &p->g->sets[no];
982*55847Sbostic 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
983*55847Sbostic 	cs->mask = 1 << ((no) % CHAR_BIT);
984*55847Sbostic 	cs->hash = 0;
985*55847Sbostic 	cs->smultis = 0;
986*55847Sbostic 	cs->multis = NULL;
987*55847Sbostic 
988*55847Sbostic 	return(cs);
989*55847Sbostic }
990*55847Sbostic 
991*55847Sbostic /*
992*55847Sbostic  - freezeset - final processing on a set of characters
993*55847Sbostic  *
994*55847Sbostic  * The main task here is merging identical sets.  This is usually a waste
995*55847Sbostic  * of time (although the hash code minimizes the overhead), but can win
996*55847Sbostic  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
997*55847Sbostic  * is done using addition rather than xor -- all ASCII [aA] sets xor to
998*55847Sbostic  * the same value!
999*55847Sbostic  */
1000*55847Sbostic static int			/* set number */
1001*55847Sbostic freezeset(p, cs)
1002*55847Sbostic register struct parse *p;
1003*55847Sbostic register cset *cs;
1004*55847Sbostic {
1005*55847Sbostic 	register uchar h = cs->hash;
1006*55847Sbostic 	register int i;
1007*55847Sbostic 	register cset *top = &p->g->sets[p->g->ncsets];
1008*55847Sbostic 	register uchar c;
1009*55847Sbostic 	register cset *cs2;
1010*55847Sbostic 	register size_t css = (size_t)p->g->csetsize;
1011*55847Sbostic 
1012*55847Sbostic 	/* look for an earlier one which is the same */
1013*55847Sbostic 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1014*55847Sbostic 		if (cs2->hash == h && cs2 != cs) {
1015*55847Sbostic 			/* maybe */
1016*55847Sbostic 			for (i = 0; i < css; i++)
1017*55847Sbostic 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1018*55847Sbostic 					break;		/* no */
1019*55847Sbostic 			if (i == css)
1020*55847Sbostic 				break;			/* yes */
1021*55847Sbostic 		}
1022*55847Sbostic 
1023*55847Sbostic 	if (cs2 < top) {	/* found one */
1024*55847Sbostic 		assert(cs == top-1);
1025*55847Sbostic 		p->g->ncsets--;
1026*55847Sbostic 		for (i = 0; i < css; i++)
1027*55847Sbostic 			CHsub(cs, i);
1028*55847Sbostic 		cs = cs2;
1029*55847Sbostic 	}
1030*55847Sbostic 
1031*55847Sbostic 	return((int)(cs - p->g->sets));
1032*55847Sbostic }
1033*55847Sbostic 
1034*55847Sbostic /*
1035*55847Sbostic  - mcadd - add a collating element to a cset
1036*55847Sbostic  */
1037*55847Sbostic static void
1038*55847Sbostic mcadd(p, cs, cp)
1039*55847Sbostic register struct parse *p;
1040*55847Sbostic register cset *cs;
1041*55847Sbostic register uchar *cp;
1042*55847Sbostic {
1043*55847Sbostic 	register size_t oldend = cs->smultis;
1044*55847Sbostic 
1045*55847Sbostic 	cs->smultis += strlen((char *)cp) + 1;
1046*55847Sbostic 	if (cs->multis == NULL)
1047*55847Sbostic 		cs->multis = (uchar *)malloc(cs->smultis);
1048*55847Sbostic 	else
1049*55847Sbostic 		cs->multis = (uchar *)realloc(cs->multis, cs->smultis);
1050*55847Sbostic 	if (cs->multis == NULL) {
1051*55847Sbostic 		SETERROR(REG_ESPACE);
1052*55847Sbostic 		return;
1053*55847Sbostic 	}
1054*55847Sbostic 
1055*55847Sbostic 	(void) strcpy((char *)(cs->multis + oldend - 1), (char *)cp);
1056*55847Sbostic 	cs->multis[cs->smultis - 1] = '\0';
1057*55847Sbostic }
1058*55847Sbostic 
1059*55847Sbostic /*
1060*55847Sbostic  - mcsub - subtract a collating element from a cset
1061*55847Sbostic  */
1062*55847Sbostic static void
1063*55847Sbostic mcsub(p, cs, cp)
1064*55847Sbostic register struct parse *p;
1065*55847Sbostic register cset *cs;
1066*55847Sbostic register uchar *cp;
1067*55847Sbostic {
1068*55847Sbostic 	register uchar *fp = mcfind(cs, cp);
1069*55847Sbostic 	register size_t len = strlen((char *)fp);
1070*55847Sbostic 
1071*55847Sbostic 	assert(p != NULL);
1072*55847Sbostic 	(void) memmove((char *)fp, (char *)(fp + len + 1),
1073*55847Sbostic 				cs->smultis - (fp + len + 1 - cs->multis));
1074*55847Sbostic 	cs->smultis -= len;
1075*55847Sbostic 
1076*55847Sbostic 	if (cs->smultis == 0) {
1077*55847Sbostic 		free((char *)cs->multis);
1078*55847Sbostic 		cs->multis = NULL;
1079*55847Sbostic 		return;
1080*55847Sbostic 	}
1081*55847Sbostic 
1082*55847Sbostic 	cs->multis = (uchar *)realloc(cs->multis, cs->smultis);
1083*55847Sbostic 	assert(cs->multis != NULL);
1084*55847Sbostic }
1085*55847Sbostic 
1086*55847Sbostic /*
1087*55847Sbostic  - mcin - is a collating element in a cset?
1088*55847Sbostic  */
1089*55847Sbostic static int
1090*55847Sbostic mcin(p, cs, cp)
1091*55847Sbostic register struct parse *p;
1092*55847Sbostic register cset *cs;
1093*55847Sbostic register uchar *cp;
1094*55847Sbostic {
1095*55847Sbostic 	return(mcfind(cs, cp) != NULL);
1096*55847Sbostic }
1097*55847Sbostic 
1098*55847Sbostic /*
1099*55847Sbostic  - mcfind - find a collating element in a cset
1100*55847Sbostic  */
1101*55847Sbostic static uchar *
1102*55847Sbostic mcfind(cs, cp)
1103*55847Sbostic register cset *cs;
1104*55847Sbostic register uchar *cp;
1105*55847Sbostic {
1106*55847Sbostic 	register uchar *p;
1107*55847Sbostic 
1108*55847Sbostic 	if (cs->multis == NULL)
1109*55847Sbostic 		return(NULL);
1110*55847Sbostic 	for (p = cs->multis; *p != '\0'; p += strlen((char *)p) + 1)
1111*55847Sbostic 		if (strcmp((char *)cp, (char *)p) == 0)
1112*55847Sbostic 			return(p);
1113*55847Sbostic 	return(NULL);
1114*55847Sbostic }
1115*55847Sbostic 
1116*55847Sbostic /*
1117*55847Sbostic  - mcinvert - invert the list of collating elements in a cset
1118*55847Sbostic  *
1119*55847Sbostic  * This would have to know the set of possibilities.  Implementation
1120*55847Sbostic  * is deferred.
1121*55847Sbostic  */
1122*55847Sbostic static void
1123*55847Sbostic mcinvert(p, cs)
1124*55847Sbostic register struct parse *p;
1125*55847Sbostic register cset *cs;
1126*55847Sbostic {
1127*55847Sbostic 	assert(cs->multis == NULL);	/* xxx */
1128*55847Sbostic }
1129*55847Sbostic 
1130*55847Sbostic /*
1131*55847Sbostic  - isinsets - is this character in any sets?
1132*55847Sbostic  */
1133*55847Sbostic static int			/* predicate */
1134*55847Sbostic isinsets(g, c)
1135*55847Sbostic register struct re_guts *g;
1136*55847Sbostic uchar c;
1137*55847Sbostic {
1138*55847Sbostic 	register uchar *col;
1139*55847Sbostic 	register int i;
1140*55847Sbostic 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1141*55847Sbostic 
1142*55847Sbostic 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1143*55847Sbostic 		if (col[c] != 0)
1144*55847Sbostic 			return(1);
1145*55847Sbostic 	return(0);
1146*55847Sbostic }
1147*55847Sbostic 
1148*55847Sbostic /*
1149*55847Sbostic  - samesets - are these two characters in exactly the same sets?
1150*55847Sbostic  */
1151*55847Sbostic static int			/* predicate */
1152*55847Sbostic samesets(g, c1, c2)
1153*55847Sbostic register struct re_guts *g;
1154*55847Sbostic register uchar c1;
1155*55847Sbostic register uchar c2;
1156*55847Sbostic {
1157*55847Sbostic 	register uchar *col;
1158*55847Sbostic 	register int i;
1159*55847Sbostic 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1160*55847Sbostic 
1161*55847Sbostic 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1162*55847Sbostic 		if (col[c1] != col[c2])
1163*55847Sbostic 			return(0);
1164*55847Sbostic 	return(1);
1165*55847Sbostic }
1166*55847Sbostic 
1167*55847Sbostic /*
1168*55847Sbostic  - categorize - sort out character categories
1169*55847Sbostic  */
1170*55847Sbostic static void
1171*55847Sbostic categorize(p, g)
1172*55847Sbostic struct parse *p;
1173*55847Sbostic register struct re_guts *g;
1174*55847Sbostic {
1175*55847Sbostic 	register uchar *cats = g->categories;
1176*55847Sbostic 	register unsigned c;
1177*55847Sbostic 	register unsigned c2;
1178*55847Sbostic 	register uchar cat;
1179*55847Sbostic 
1180*55847Sbostic 	/* avoid making error situations worse */
1181*55847Sbostic 	if (p->error != 0)
1182*55847Sbostic 		return;
1183*55847Sbostic 
1184*55847Sbostic 	for (c = 0; c < g->csetsize; c++)
1185*55847Sbostic 		if (cats[c] == 0 && isinsets(g, c)) {
1186*55847Sbostic 			cat = g->ncategories++;
1187*55847Sbostic 			cats[c] = cat;
1188*55847Sbostic 			for (c2 = c+1; c2 < g->csetsize; c2++)
1189*55847Sbostic 				if (cats[c2] == 0 && samesets(g, c, c2))
1190*55847Sbostic 					cats[c2] = cat;
1191*55847Sbostic 		}
1192*55847Sbostic }
1193*55847Sbostic 
1194*55847Sbostic /*
1195*55847Sbostic  - dupl - emit a duplicate of a bunch of sops
1196*55847Sbostic  */
1197*55847Sbostic static sopno			/* start of duplicate */
1198*55847Sbostic dupl(p, start, finish)
1199*55847Sbostic register struct parse *p;
1200*55847Sbostic sopno start;			/* from here */
1201*55847Sbostic sopno finish;			/* to this less one */
1202*55847Sbostic {
1203*55847Sbostic 	register int i;
1204*55847Sbostic 	register sopno ret = HERE();
1205*55847Sbostic 	register sopno len = finish - start;
1206*55847Sbostic 
1207*55847Sbostic 	assert(finish >= start);
1208*55847Sbostic 	if (len == 0)
1209*55847Sbostic 		return(ret);
1210*55847Sbostic 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
1211*55847Sbostic 	assert(p->ssize >= p->slen + len);
1212*55847Sbostic 	(void) memcpy((char *)(p->strip + p->slen),
1213*55847Sbostic 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
1214*55847Sbostic 	p->slen += len;
1215*55847Sbostic 	return(ret);
1216*55847Sbostic }
1217*55847Sbostic 
1218*55847Sbostic /*
1219*55847Sbostic  - doemit - emit a strip operator
1220*55847Sbostic  *
1221*55847Sbostic  * It might seem better to implement this as a macro with a function as
1222*55847Sbostic  * hard-case backup, but it's just too big and messy unless there are
1223*55847Sbostic  * some changes to the data structures.  Maybe later.
1224*55847Sbostic  */
1225*55847Sbostic static void
1226*55847Sbostic doemit(p, op, opnd)
1227*55847Sbostic register struct parse *p;
1228*55847Sbostic sop op;
1229*55847Sbostic size_t opnd;
1230*55847Sbostic {
1231*55847Sbostic 	/* avoid making error situations worse */
1232*55847Sbostic 	if (p->error != 0)
1233*55847Sbostic 		return;
1234*55847Sbostic 
1235*55847Sbostic 	/* deal with oversize operands ("can't happen", more or less) */
1236*55847Sbostic 	assert(opnd < 1<<OPSHIFT);
1237*55847Sbostic 
1238*55847Sbostic 	/* deal with undersized strip */
1239*55847Sbostic 	if (p->slen >= p->ssize)
1240*55847Sbostic 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
1241*55847Sbostic 	assert(p->slen < p->ssize);
1242*55847Sbostic 
1243*55847Sbostic 	/* finally, it's all reduced to the easy case */
1244*55847Sbostic 	p->strip[p->slen++] = SOP(op, opnd);
1245*55847Sbostic }
1246*55847Sbostic 
1247*55847Sbostic /*
1248*55847Sbostic  - doinsert - insert a sop into the strip
1249*55847Sbostic  */
1250*55847Sbostic static void
1251*55847Sbostic doinsert(p, op, opnd, pos)
1252*55847Sbostic register struct parse *p;
1253*55847Sbostic sop op;
1254*55847Sbostic size_t opnd;
1255*55847Sbostic sopno pos;
1256*55847Sbostic {
1257*55847Sbostic 	register sopno sn;
1258*55847Sbostic 	register sop s;
1259*55847Sbostic 	register int i;
1260*55847Sbostic 
1261*55847Sbostic 	/* avoid making error situations worse */
1262*55847Sbostic 	if (p->error != 0)
1263*55847Sbostic 		return;
1264*55847Sbostic 
1265*55847Sbostic 	sn = HERE();
1266*55847Sbostic 	EMIT(op, opnd);		/* do checks, ensure space */
1267*55847Sbostic 	assert(HERE() == sn+1);
1268*55847Sbostic 	s = p->strip[sn];
1269*55847Sbostic 
1270*55847Sbostic 	/* adjust paren pointers */
1271*55847Sbostic 	assert(pos > 0);
1272*55847Sbostic 	for (i = 1; i < NPAREN; i++) {
1273*55847Sbostic 		if (p->pbegin[i] >= pos) {
1274*55847Sbostic 			p->pbegin[i]++;
1275*55847Sbostic 		}
1276*55847Sbostic 		if (p->pend[i] >= pos) {
1277*55847Sbostic 			p->pend[i]++;
1278*55847Sbostic 		}
1279*55847Sbostic 	}
1280*55847Sbostic 
1281*55847Sbostic 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1282*55847Sbostic 						(HERE()-pos-1)*sizeof(sop));
1283*55847Sbostic 	p->strip[pos] = s;
1284*55847Sbostic }
1285*55847Sbostic 
1286*55847Sbostic /*
1287*55847Sbostic  - dofwd - complete a forward reference
1288*55847Sbostic  */
1289*55847Sbostic static void
1290*55847Sbostic dofwd(p, pos, value)
1291*55847Sbostic register struct parse *p;
1292*55847Sbostic register sopno pos;
1293*55847Sbostic sop value;
1294*55847Sbostic {
1295*55847Sbostic 	/* avoid making error situations worse */
1296*55847Sbostic 	if (p->error != 0)
1297*55847Sbostic 		return;
1298*55847Sbostic 
1299*55847Sbostic 	assert(value < 1<<OPSHIFT);
1300*55847Sbostic 	p->strip[pos] = OP(p->strip[pos]) | value;
1301*55847Sbostic }
1302*55847Sbostic 
1303*55847Sbostic /*
1304*55847Sbostic  - enlarge - enlarge the strip
1305*55847Sbostic  */
1306*55847Sbostic static void
1307*55847Sbostic enlarge(p, size)
1308*55847Sbostic register struct parse *p;
1309*55847Sbostic register sopno size;
1310*55847Sbostic {
1311*55847Sbostic 	register sop *sp;
1312*55847Sbostic 
1313*55847Sbostic 	if (p->ssize >= size)
1314*55847Sbostic 		return;
1315*55847Sbostic 
1316*55847Sbostic 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
1317*55847Sbostic 	if (sp == NULL) {
1318*55847Sbostic 		SETERROR(REG_ESPACE);
1319*55847Sbostic 		return;
1320*55847Sbostic 	}
1321*55847Sbostic 	p->strip = sp;
1322*55847Sbostic 	p->ssize = size;
1323*55847Sbostic }
1324*55847Sbostic 
1325*55847Sbostic /*
1326*55847Sbostic  - stripsnug - compact the strip
1327*55847Sbostic  */
1328*55847Sbostic static void
1329*55847Sbostic stripsnug(p, g)
1330*55847Sbostic register struct parse *p;
1331*55847Sbostic register struct re_guts *g;
1332*55847Sbostic {
1333*55847Sbostic 	g->nstates = p->slen;
1334*55847Sbostic 	g->strip = (sop *)realloc((sop *)p->strip, p->slen * sizeof(sop));
1335*55847Sbostic 	if (g->strip == NULL) {
1336*55847Sbostic 		SETERROR(REG_ESPACE);
1337*55847Sbostic 		g->strip = p->strip;
1338*55847Sbostic 	}
1339*55847Sbostic }
1340*55847Sbostic 
1341*55847Sbostic /*
1342*55847Sbostic  - findmust - fill in must and mlen with longest mandatory literal string
1343*55847Sbostic  *
1344*55847Sbostic  * This algorithm could do fancy things like analyzing the operands of |
1345*55847Sbostic  * for common subsequences.  Someday.  This code is simple and finds most
1346*55847Sbostic  * of the interesting cases.
1347*55847Sbostic  *
1348*55847Sbostic  * Note that must and mlen got initialized during setup.
1349*55847Sbostic  */
1350*55847Sbostic STATIC void
1351*55847Sbostic findmust(p, g)
1352*55847Sbostic struct parse *p;
1353*55847Sbostic register struct re_guts *g;
1354*55847Sbostic {
1355*55847Sbostic 	register sop *scan;
1356*55847Sbostic 	sop *start;
1357*55847Sbostic 	register sop *newstart;
1358*55847Sbostic 	register sopno newlen;
1359*55847Sbostic 	register sop s;
1360*55847Sbostic 	register char *cp;
1361*55847Sbostic 	register sopno i;
1362*55847Sbostic 
1363*55847Sbostic 	/* avoid making error situations worse */
1364*55847Sbostic 	if (p->error != 0)
1365*55847Sbostic 		return;
1366*55847Sbostic 
1367*55847Sbostic 	/* find the longest OCHAR sequence in strip */
1368*55847Sbostic 	newlen = 0;
1369*55847Sbostic 	scan = g->strip + 1;
1370*55847Sbostic 	do {
1371*55847Sbostic 		s = *scan++;
1372*55847Sbostic 		switch (OP(s)) {
1373*55847Sbostic 		case OCHAR:		/* sequence member */
1374*55847Sbostic 			if (newlen == 0)		/* new sequence */
1375*55847Sbostic 				newstart = scan - 1;
1376*55847Sbostic 			newlen++;
1377*55847Sbostic 			break;
1378*55847Sbostic 		case OPLUS_:		/* things that don't break one */
1379*55847Sbostic 		case OLPAREN:
1380*55847Sbostic 		case ORPAREN:
1381*55847Sbostic 			break;
1382*55847Sbostic 		case OQUEST_:		/* things that must be skipped */
1383*55847Sbostic 		case OCH_:
1384*55847Sbostic 			scan--;
1385*55847Sbostic 			do {
1386*55847Sbostic 				scan += OPND(s);
1387*55847Sbostic 				s = *scan;
1388*55847Sbostic 				/* assert() interferes w debug printouts */
1389*55847Sbostic 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
1390*55847Sbostic 							OP(s) != OOR2) {
1391*55847Sbostic 					g->iflags |= BAD;
1392*55847Sbostic 					return;
1393*55847Sbostic 				}
1394*55847Sbostic 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
1395*55847Sbostic 			/* fallthrough */
1396*55847Sbostic 		default:		/* things that break a sequence */
1397*55847Sbostic 			if (newlen > g->mlen) {		/* ends one */
1398*55847Sbostic 				start = newstart;
1399*55847Sbostic 				g->mlen = newlen;
1400*55847Sbostic 			}
1401*55847Sbostic 			newlen = 0;
1402*55847Sbostic 			break;
1403*55847Sbostic 		}
1404*55847Sbostic 	} while (OP(s) != OEND);
1405*55847Sbostic 
1406*55847Sbostic 	if (g->mlen == 0)		/* there isn't one */
1407*55847Sbostic 		return;
1408*55847Sbostic 
1409*55847Sbostic 	/* turn it into a character string */
1410*55847Sbostic 	g->must = malloc((size_t)g->mlen + 1);
1411*55847Sbostic 	if (g->must == NULL) {		/* argh; just forget it */
1412*55847Sbostic 		g->mlen = 0;
1413*55847Sbostic 		return;
1414*55847Sbostic 	}
1415*55847Sbostic 	cp = g->must;
1416*55847Sbostic 	scan = start;
1417*55847Sbostic 	for (i = g->mlen; i > 0; i--) {
1418*55847Sbostic 		while (OP(s = *scan++) != OCHAR)
1419*55847Sbostic 			continue;
1420*55847Sbostic 		*cp++ = OPND(s);
1421*55847Sbostic 	}
1422*55847Sbostic 	*cp++ = '\0';		/* just on general principles */
1423*55847Sbostic }
1424*55847Sbostic 
1425*55847Sbostic /*
1426*55847Sbostic  - pluscount - count + nesting
1427*55847Sbostic  */
1428*55847Sbostic STATIC sopno			/* nesting depth */
1429*55847Sbostic pluscount(p, g)
1430*55847Sbostic struct parse *p;
1431*55847Sbostic register struct re_guts *g;
1432*55847Sbostic {
1433*55847Sbostic 	register sop *scan;
1434*55847Sbostic 	register sop s;
1435*55847Sbostic 	register sopno plusnest = 0;
1436*55847Sbostic 	register sopno maxnest = 0;
1437*55847Sbostic 
1438*55847Sbostic 	if (p->error != 0)
1439*55847Sbostic 		return(0);	/* there may not be an OEND */
1440*55847Sbostic 
1441*55847Sbostic 	scan = g->strip + 1;
1442*55847Sbostic 	do {
1443*55847Sbostic 		s = *scan++;
1444*55847Sbostic 		switch (OP(s)) {
1445*55847Sbostic 		case OPLUS_:
1446*55847Sbostic 			plusnest++;
1447*55847Sbostic 			break;
1448*55847Sbostic 		case O_PLUS:
1449*55847Sbostic 			if (plusnest > maxnest)
1450*55847Sbostic 				maxnest = plusnest;
1451*55847Sbostic 			plusnest--;
1452*55847Sbostic 			break;
1453*55847Sbostic 		}
1454*55847Sbostic 	} while (OP(s) != OEND);
1455*55847Sbostic 	if (plusnest != 0)
1456*55847Sbostic 		g->iflags |= BAD;
1457*55847Sbostic 	return(maxnest);
1458*55847Sbostic }
1459