xref: /csrg-svn/lib/libc/regex/regcomp.c (revision 66406)
155847Sbostic /*-
266362Sbostic  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
366362Sbostic  * Copyright (c) 1992, 1993, 1994
461162Sbostic  *	The Regents of the University of California.  All rights reserved.
555847Sbostic  *
655847Sbostic  * This code is derived from software contributed to Berkeley by
7*66406Sbostic  * Henry Spencer.
855847Sbostic  *
955847Sbostic  * %sccs.include.redist.c%
1055847Sbostic  *
11*66406Sbostic  *	@(#)regcomp.c	8.5 (Berkeley) 03/20/94
1255847Sbostic  */
1355847Sbostic 
1455847Sbostic #if defined(LIBC_SCCS) && !defined(lint)
15*66406Sbostic static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 03/20/94";
1655847Sbostic #endif /* LIBC_SCCS and not lint */
1755847Sbostic 
1855847Sbostic #include <sys/types.h>
1955847Sbostic #include <stdio.h>
2055847Sbostic #include <string.h>
2155847Sbostic #include <ctype.h>
2255847Sbostic #include <limits.h>
2355847Sbostic #include <stdlib.h>
2455847Sbostic #include <regex.h>
2555847Sbostic 
2655847Sbostic #include "utils.h"
2755847Sbostic #include "regex2.h"
2855847Sbostic 
2955847Sbostic #include "cclass.h"
3055847Sbostic #include "cname.h"
3155847Sbostic 
3255847Sbostic /*
3355847Sbostic  * parse structure, passed up and down to avoid global variables and
3455847Sbostic  * other clumsinesses
3555847Sbostic  */
3655847Sbostic struct parse {
3760201Sbostic 	char *next;		/* next character in RE */
3860201Sbostic 	char *end;		/* end of string (-> NUL normally) */
3955847Sbostic 	int error;		/* has an error been seen? */
4055847Sbostic 	sop *strip;		/* malloced strip */
4155847Sbostic 	sopno ssize;		/* malloced strip size (allocated) */
4255847Sbostic 	sopno slen;		/* malloced strip length (used) */
4355847Sbostic 	int ncsalloc;		/* number of csets allocated */
4455847Sbostic 	struct re_guts *g;
4555847Sbostic #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
4655847Sbostic 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
4755847Sbostic 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
4855847Sbostic };
4955847Sbostic 
5066362Sbostic /* ========= begin header generated by ./mkh ========= */
5166362Sbostic #ifdef __cplusplus
5266362Sbostic extern "C" {
5366362Sbostic #endif
5456355Sbostic 
5566362Sbostic /* === regcomp.c === */
5666385Sbostic static void p_ere __P((struct parse *p, int stop));
5766385Sbostic static void p_ere_exp __P((struct parse *p));
5866385Sbostic static void p_str __P((struct parse *p));
5966385Sbostic static void p_bre __P((struct parse *p, int end1, int end2));
6066385Sbostic static int p_simp_re __P((struct parse *p, int starordinary));
6166385Sbostic static int p_count __P((struct parse *p));
6266385Sbostic static void p_bracket __P((struct parse *p));
6366385Sbostic static void p_b_term __P((struct parse *p, cset *cs));
6466385Sbostic static void p_b_cclass __P((struct parse *p, cset *cs));
6566385Sbostic static void p_b_eclass __P((struct parse *p, cset *cs));
6666385Sbostic static char p_b_symbol __P((struct parse *p));
6766385Sbostic static char p_b_coll_elem __P((struct parse *p, int endc));
6866385Sbostic static char othercase __P((int ch));
6966385Sbostic static void bothcases __P((struct parse *p, int ch));
7066385Sbostic static void ordinary __P((struct parse *p, int ch));
7166385Sbostic static void nonnewline __P((struct parse *p));
7266385Sbostic static void repeat __P((struct parse *p, sopno start, int from, int to));
7366385Sbostic static int seterr __P((struct parse *p, int e));
7466385Sbostic static cset *allocset __P((struct parse *p));
7566385Sbostic static void freeset __P((struct parse *p, cset *cs));
7666385Sbostic static int freezeset __P((struct parse *p, cset *cs));
7766385Sbostic static int firstch __P((struct parse *p, cset *cs));
7866385Sbostic static int nch __P((struct parse *p, cset *cs));
7966385Sbostic static void mcadd __P((struct parse *p, cset *cs, char *cp));
8066385Sbostic static void mcsub __P((cset *cs, char *cp));
8166385Sbostic static int mcin __P((cset *cs, char *cp));
8266385Sbostic static char *mcfind __P((cset *cs, char *cp));
8366385Sbostic static void mcinvert __P((struct parse *p, cset *cs));
8466385Sbostic static void mccase __P((struct parse *p, cset *cs));
8566385Sbostic static int isinsets __P((struct re_guts *g, int c));
8666385Sbostic static int samesets __P((struct re_guts *g, int c1, int c2));
8766385Sbostic static void categorize __P((struct parse *p, struct re_guts *g));
8866385Sbostic static sopno dupl __P((struct parse *p, sopno start, sopno finish));
8966385Sbostic static void doemit __P((struct parse *p, sop op, size_t opnd));
9066385Sbostic static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
9166385Sbostic static void dofwd __P((struct parse *p, sopno pos, sop value));
9266385Sbostic static void enlarge __P((struct parse *p, sopno size));
9366385Sbostic static void stripsnug __P((struct parse *p, struct re_guts *g));
9466385Sbostic static void findmust __P((struct parse *p, struct re_guts *g));
9566385Sbostic static sopno pluscount __P((struct parse *p, struct re_guts *g));
9660201Sbostic 
9766362Sbostic #ifdef __cplusplus
9866362Sbostic }
9966362Sbostic #endif
10066362Sbostic /* ========= end header generated by ./mkh ========= */
10166362Sbostic 
10260201Sbostic static char nuls[10];		/* place to point scanner in event of error */
10360201Sbostic 
10455847Sbostic /*
10555847Sbostic  * macros for use with parse structure
10655847Sbostic  * BEWARE:  these know that the parse structure is named `p' !!!
10755847Sbostic  */
10860201Sbostic #define	PEEK()	(*p->next)
10960201Sbostic #define	PEEK2()	(*(p->next+1))
11060201Sbostic #define	MORE()	(p->next < p->end)
11160201Sbostic #define	MORE2()	(p->next+1 < p->end)
11260201Sbostic #define	SEE(c)	(MORE() && PEEK() == (c))
11360201Sbostic #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
11455847Sbostic #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
11555847Sbostic #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
11655847Sbostic #define	NEXT()	(p->next++)
11755847Sbostic #define	NEXT2()	(p->next += 2)
11855847Sbostic #define	NEXTn(n)	(p->next += (n))
11960201Sbostic #define	GETNEXT()	(*p->next++)
12055847Sbostic #define	SETERROR(e)	seterr(p, (e))
12155847Sbostic #define	REQUIRE(co, e)	((co) || SETERROR(e))
12260201Sbostic #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
12360201Sbostic #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
12460201Sbostic #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
12566362Sbostic #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
12666362Sbostic #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
12760201Sbostic #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
12860201Sbostic #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
12955847Sbostic #define	HERE()		(p->slen)
13055847Sbostic #define	THERE()		(p->slen - 1)
13166381Sbostic #define	THERETHERE()	(p->slen - 2)
13255847Sbostic #define	DROP(n)	(p->slen -= (n))
13355847Sbostic 
13460201Sbostic #ifndef NDEBUG
13560201Sbostic static int never = 0;		/* for use in asserts; shuts lint up */
13666362Sbostic #else
13766362Sbostic #define	never	0		/* some <assert.h>s have bugs too */
13860201Sbostic #endif
13956357Sbostic 
14055847Sbostic /*
14155847Sbostic  - regcomp - interface for parser and compilation
14266362Sbostic  = extern int regcomp(regex_t *, const char *, int);
14360201Sbostic  = #define	REG_BASIC	0000
14460201Sbostic  = #define	REG_EXTENDED	0001
14560201Sbostic  = #define	REG_ICASE	0002
14660201Sbostic  = #define	REG_NOSUB	0004
14760201Sbostic  = #define	REG_NEWLINE	0010
14860201Sbostic  = #define	REG_NOSPEC	0020
14960201Sbostic  = #define	REG_PEND	0040
15060201Sbostic  = #define	REG_DUMP	0200
15155847Sbostic  */
15255847Sbostic int				/* 0 success, otherwise REG_something */
regcomp(preg,pattern,cflags)15355847Sbostic regcomp(preg, pattern, cflags)
15455847Sbostic regex_t *preg;
15555847Sbostic const char *pattern;
15655847Sbostic int cflags;
15755847Sbostic {
15855847Sbostic 	struct parse pa;
15955847Sbostic 	register struct re_guts *g;
16055847Sbostic 	register struct parse *p = &pa;
16155847Sbostic 	register int i;
16260201Sbostic 	register size_t len;
16366362Sbostic #ifdef REDEBUG
16466362Sbostic #	define	GOODFLAGS(f)	(f)
16566362Sbostic #else
16666362Sbostic #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
16766362Sbostic #endif
16855847Sbostic 
16966362Sbostic 	cflags = GOODFLAGS(cflags);
17060201Sbostic 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
17160201Sbostic 		return(REG_INVARG);
17260201Sbostic 
17360201Sbostic 	if (cflags&REG_PEND) {
17460201Sbostic 		if (preg->re_endp < pattern)
17560201Sbostic 			return(REG_INVARG);
17660201Sbostic 		len = preg->re_endp - pattern;
17760201Sbostic 	} else
17860201Sbostic 		len = strlen((char *)pattern);
17960201Sbostic 
18055847Sbostic 	/* do the mallocs early so failure handling is easy */
18160201Sbostic 	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
18260201Sbostic 							(NC-1)*sizeof(cat_t));
18355847Sbostic 	if (g == NULL)
18455847Sbostic 		return(REG_ESPACE);
18560201Sbostic 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
18655847Sbostic 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
18755847Sbostic 	p->slen = 0;
18855847Sbostic 	if (p->strip == NULL) {
18955847Sbostic 		free((char *)g);
19055847Sbostic 		return(REG_ESPACE);
19155847Sbostic 	}
19255847Sbostic 
19355847Sbostic 	/* set things up */
19455847Sbostic 	p->g = g;
19560201Sbostic 	p->next = (char *)pattern;	/* convenience; we do not modify it */
19660201Sbostic 	p->end = p->next + len;
19755847Sbostic 	p->error = 0;
19855847Sbostic 	p->ncsalloc = 0;
19955847Sbostic 	for (i = 0; i < NPAREN; i++) {
20055847Sbostic 		p->pbegin[i] = 0;
20155847Sbostic 		p->pend[i] = 0;
20255847Sbostic 	}
20360201Sbostic 	g->csetsize = NC;
20455847Sbostic 	g->sets = NULL;
20555847Sbostic 	g->setbits = NULL;
20655847Sbostic 	g->ncsets = 0;
20755847Sbostic 	g->cflags = cflags;
20855847Sbostic 	g->iflags = 0;
20960201Sbostic 	g->nbol = 0;
21060201Sbostic 	g->neol = 0;
21155847Sbostic 	g->must = NULL;
21255847Sbostic 	g->mlen = 0;
21355847Sbostic 	g->nsub = 0;
21455847Sbostic 	g->ncategories = 1;	/* category 0 is "everything else" */
21560610Sbostic 	g->categories = &g->catspace[-(CHAR_MIN)];
21660201Sbostic 	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
21755847Sbostic 	g->backrefs = 0;
21855847Sbostic 
21955847Sbostic 	/* do it */
22055847Sbostic 	EMIT(OEND, 0);
22155847Sbostic 	g->firststate = THERE();
22255847Sbostic 	if (cflags&REG_EXTENDED)
22360201Sbostic 		p_ere(p, OUT);
22460201Sbostic 	else if (cflags&REG_NOSPEC)
22560201Sbostic 		p_str(p);
22655847Sbostic 	else
22760201Sbostic 		p_bre(p, OUT, OUT);
22855847Sbostic 	EMIT(OEND, 0);
22955847Sbostic 	g->laststate = THERE();
23055847Sbostic 
23155847Sbostic 	/* tidy up loose ends and fill things in */
23255847Sbostic 	categorize(p, g);
23355847Sbostic 	stripsnug(p, g);
23455847Sbostic 	findmust(p, g);
23555847Sbostic 	g->nplus = pluscount(p, g);
23655847Sbostic 	g->magic = MAGIC2;
23755847Sbostic 	preg->re_nsub = g->nsub;
23855847Sbostic 	preg->re_g = g;
23955847Sbostic 	preg->re_magic = MAGIC1;
24056355Sbostic #ifndef REDEBUG
24155847Sbostic 	/* not debugging, so can't rely on the assert() in regexec() */
24255847Sbostic 	if (g->iflags&BAD)
24355847Sbostic 		SETERROR(REG_ASSERT);
24455847Sbostic #endif
24555847Sbostic 
24655847Sbostic 	/* win or lose, we're done */
24755847Sbostic 	if (p->error != 0)	/* lose */
24855847Sbostic 		regfree(preg);
24955847Sbostic 	return(p->error);
25055847Sbostic }
25155847Sbostic 
25255847Sbostic /*
25355847Sbostic  - p_ere - ERE parser top level, concatenation and alternation
25460201Sbostic  == static void p_ere(register struct parse *p, int stop);
25555847Sbostic  */
25655847Sbostic static void
p_ere(p,stop)25755847Sbostic p_ere(p, stop)
25855847Sbostic register struct parse *p;
25960201Sbostic int stop;			/* character this ERE should end at */
26055847Sbostic {
26160201Sbostic 	register char c;
26255847Sbostic 	register sopno prevback;
26355847Sbostic 	register sopno prevfwd;
26455847Sbostic 	register sopno conc;
26555847Sbostic 	register int first = 1;		/* is this the first alternative? */
26655847Sbostic 
26755847Sbostic 	for (;;) {
26855847Sbostic 		/* do a bunch of concatenated expressions */
26955847Sbostic 		conc = HERE();
27060201Sbostic 		while (MORE() && (c = PEEK()) != '|' && c != stop)
27155847Sbostic 			p_ere_exp(p);
27255847Sbostic 		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
27355847Sbostic 
27455847Sbostic 		if (!EAT('|'))
27555847Sbostic 			break;		/* NOTE BREAK OUT */
27655847Sbostic 
27755847Sbostic 		if (first) {
27855847Sbostic 			INSERT(OCH_, conc);	/* offset is wrong */
27955847Sbostic 			prevfwd = conc;
28055847Sbostic 			prevback = conc;
28155847Sbostic 			first = 0;
28255847Sbostic 		}
28360201Sbostic 		ASTERN(OOR1, prevback);
28455847Sbostic 		prevback = THERE();
28560201Sbostic 		AHEAD(prevfwd);			/* fix previous offset */
28655847Sbostic 		prevfwd = HERE();
28755847Sbostic 		EMIT(OOR2, 0);			/* offset is very wrong */
28855847Sbostic 	}
28955847Sbostic 
29055847Sbostic 	if (!first) {		/* tail-end fixups */
29160201Sbostic 		AHEAD(prevfwd);
29260201Sbostic 		ASTERN(O_CH, prevback);
29355847Sbostic 	}
29455847Sbostic 
29560201Sbostic 	assert(!MORE() || SEE(stop));
29655847Sbostic }
29755847Sbostic 
29855847Sbostic /*
29955847Sbostic  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
30060201Sbostic  == static void p_ere_exp(register struct parse *p);
30155847Sbostic  */
30255847Sbostic static void
p_ere_exp(p)30355847Sbostic p_ere_exp(p)
30455847Sbostic register struct parse *p;
30555847Sbostic {
30660201Sbostic 	register char c;
30755847Sbostic 	register sopno pos;
30855847Sbostic 	register int count;
30955847Sbostic 	register int count2;
31055847Sbostic 	register sopno subno;
31155847Sbostic 	int wascaret = 0;
31255847Sbostic 
31360201Sbostic 	assert(MORE());		/* caller should have ensured this */
31455847Sbostic 	c = GETNEXT();
31555847Sbostic 
31655847Sbostic 	pos = HERE();
31755847Sbostic 	switch (c) {
31855847Sbostic 	case '(':
31960201Sbostic 		REQUIRE(MORE(), REG_EPAREN);
32055847Sbostic 		p->g->nsub++;
32155847Sbostic 		subno = p->g->nsub;
32255847Sbostic 		if (subno < NPAREN)
32355847Sbostic 			p->pbegin[subno] = HERE();
32455847Sbostic 		EMIT(OLPAREN, subno);
32555847Sbostic 		if (!SEE(')'))
32655847Sbostic 			p_ere(p, ')');
32755847Sbostic 		if (subno < NPAREN) {
32855847Sbostic 			p->pend[subno] = HERE();
32955847Sbostic 			assert(p->pend[subno] != 0);
33055847Sbostic 		}
33155847Sbostic 		EMIT(ORPAREN, subno);
33255847Sbostic 		MUSTEAT(')', REG_EPAREN);
33355847Sbostic 		break;
33455847Sbostic #ifndef POSIX_MISTAKE
33555847Sbostic 	case ')':		/* happens only if no current unmatched ( */
33655847Sbostic 		/*
33755847Sbostic 		 * You may ask, why the ifndef?  Because I didn't notice
33855847Sbostic 		 * this until slightly too late for 1003.2, and none of the
33955847Sbostic 		 * other 1003.2 regular-expression reviewers noticed it at
34055847Sbostic 		 * all.  So an unmatched ) is legal POSIX, at least until
34155847Sbostic 		 * we can get it fixed.
34255847Sbostic 		 */
34355847Sbostic 		SETERROR(REG_EPAREN);
34455847Sbostic 		break;
34555847Sbostic #endif
34655847Sbostic 	case '^':
34755847Sbostic 		EMIT(OBOL, 0);
34855847Sbostic 		p->g->iflags |= USEBOL;
34960201Sbostic 		p->g->nbol++;
35055847Sbostic 		wascaret = 1;
35155847Sbostic 		break;
35255847Sbostic 	case '$':
35355847Sbostic 		EMIT(OEOL, 0);
35455847Sbostic 		p->g->iflags |= USEEOL;
35560201Sbostic 		p->g->neol++;
35655847Sbostic 		break;
35755847Sbostic 	case '|':
35855847Sbostic 		SETERROR(REG_EMPTY);
35955847Sbostic 		break;
36055847Sbostic 	case '*':
36155847Sbostic 	case '+':
36255847Sbostic 	case '?':
36355847Sbostic 		SETERROR(REG_BADRPT);
36455847Sbostic 		break;
36555847Sbostic 	case '.':
36655847Sbostic 		if (p->g->cflags&REG_NEWLINE)
36755847Sbostic 			nonnewline(p);
36855847Sbostic 		else
36955847Sbostic 			EMIT(OANY, 0);
37055847Sbostic 		break;
37155847Sbostic 	case '[':
37255847Sbostic 		p_bracket(p);
37355847Sbostic 		break;
37455847Sbostic 	case '\\':
37560201Sbostic 		REQUIRE(MORE(), REG_EESCAPE);
37655847Sbostic 		c = GETNEXT();
37760201Sbostic 		ordinary(p, c);
37855847Sbostic 		break;
37955847Sbostic 	case '{':		/* okay as ordinary except if digit follows */
38060201Sbostic 		REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
38155847Sbostic 		/* FALLTHROUGH */
38255847Sbostic 	default:
38355847Sbostic 		ordinary(p, c);
38455847Sbostic 		break;
38555847Sbostic 	}
38655847Sbostic 
38760201Sbostic 	if (!MORE())
38860201Sbostic 		return;
38955847Sbostic 	c = PEEK();
39060201Sbostic 	/* we call { a repetition if followed by a digit */
39160201Sbostic 	if (!( c == '*' || c == '+' || c == '?' ||
39260201Sbostic 				(c == '{' && MORE2() && isdigit(PEEK2())) ))
39355847Sbostic 		return;		/* no repetition, we're done */
39455847Sbostic 	NEXT();
39555847Sbostic 
39655847Sbostic 	REQUIRE(!wascaret, REG_BADRPT);
39755847Sbostic 	switch (c) {
39855847Sbostic 	case '*':	/* implemented as +? */
39966381Sbostic 		/* this case does not require the (y|) trick, noKLUDGE */
40055847Sbostic 		INSERT(OPLUS_, pos);
40160201Sbostic 		ASTERN(O_PLUS, pos);
40255847Sbostic 		INSERT(OQUEST_, pos);
40360201Sbostic 		ASTERN(O_QUEST, pos);
40455847Sbostic 		break;
40555847Sbostic 	case '+':
40655847Sbostic 		INSERT(OPLUS_, pos);
40760201Sbostic 		ASTERN(O_PLUS, pos);
40855847Sbostic 		break;
40955847Sbostic 	case '?':
41066381Sbostic 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
41166381Sbostic 		INSERT(OCH_, pos);		/* offset slightly wrong */
41266381Sbostic 		ASTERN(OOR1, pos);		/* this one's right */
41366381Sbostic 		AHEAD(pos);			/* fix the OCH_ */
41466381Sbostic 		EMIT(OOR2, 0);			/* offset very wrong... */
41566381Sbostic 		AHEAD(THERE());			/* ...so fix it */
41666381Sbostic 		ASTERN(O_CH, THERETHERE());
41755847Sbostic 		break;
41855847Sbostic 	case '{':
41955847Sbostic 		count = p_count(p);
42055847Sbostic 		if (EAT(',')) {
42155847Sbostic 			if (isdigit(PEEK())) {
42255847Sbostic 				count2 = p_count(p);
42355847Sbostic 				REQUIRE(count <= count2, REG_BADBR);
42455847Sbostic 			} else		/* single number with comma */
42555847Sbostic 				count2 = INFINITY;
42655847Sbostic 		} else		/* just a single number */
42755847Sbostic 			count2 = count;
42855847Sbostic 		repeat(p, pos, count, count2);
42955847Sbostic 		if (!EAT('}')) {	/* error heuristics */
43060201Sbostic 			while (MORE() && PEEK() != '}')
43155847Sbostic 				NEXT();
43260201Sbostic 			REQUIRE(MORE(), REG_EBRACE);
43360201Sbostic 			SETERROR(REG_BADBR);
43455847Sbostic 		}
43555847Sbostic 		break;
43655847Sbostic 	}
43755847Sbostic 
43860201Sbostic 	if (!MORE())
43960201Sbostic 		return;
44055847Sbostic 	c = PEEK();
44160201Sbostic 	if (!( c == '*' || c == '+' || c == '?' ||
44260201Sbostic 				(c == '{' && MORE2() && isdigit(PEEK2())) ) )
44360201Sbostic 		return;
44460201Sbostic 	SETERROR(REG_BADRPT);
44555847Sbostic }
44655847Sbostic 
44755847Sbostic /*
44860201Sbostic  - p_str - string (no metacharacters) "parser"
44960201Sbostic  == static void p_str(register struct parse *p);
45060201Sbostic  */
45160201Sbostic static void
p_str(p)45260201Sbostic p_str(p)
45360201Sbostic register struct parse *p;
45460201Sbostic {
45560201Sbostic 	REQUIRE(MORE(), REG_EMPTY);
45660201Sbostic 	while (MORE())
45760201Sbostic 		ordinary(p, GETNEXT());
45860201Sbostic }
45960201Sbostic 
46060201Sbostic /*
46155847Sbostic  - p_bre - BRE parser top level, anchoring and concatenation
46260201Sbostic  == static void p_bre(register struct parse *p, register int end1, \
46360201Sbostic  ==	register int end2);
46460201Sbostic  * Giving end1 as OUT essentially eliminates the end1/end2 check.
46555847Sbostic  *
46656355Sbostic  * This implementation is a bit of a kludge, in that a trailing $ is first
46756355Sbostic  * taken as an ordinary character and then revised to be an anchor.  The
46856355Sbostic  * only undesirable side effect is that '$' gets included as a character
46956355Sbostic  * category in such cases.  This is fairly harmless; not worth fixing.
47060201Sbostic  * The amount of lookahead needed to avoid this kludge is excessive.
47155847Sbostic  */
47255847Sbostic static void
p_bre(p,end1,end2)47355847Sbostic p_bre(p, end1, end2)
47455847Sbostic register struct parse *p;
47560201Sbostic register int end1;		/* first terminating character */
47660201Sbostic register int end2;		/* second terminating character */
47755847Sbostic {
47855847Sbostic 	register sopno start = HERE();
47955847Sbostic 	register int first = 1;			/* first subexpression? */
48056355Sbostic 	register int wasdollar = 0;
48155847Sbostic 
48255847Sbostic 	if (EAT('^')) {
48355847Sbostic 		EMIT(OBOL, 0);
48455847Sbostic 		p->g->iflags |= USEBOL;
48560201Sbostic 		p->g->nbol++;
48655847Sbostic 	}
48760201Sbostic 	while (MORE() && !SEETWO(end1, end2)) {
48855847Sbostic 		wasdollar = p_simp_re(p, first);
48955847Sbostic 		first = 0;
49055847Sbostic 	}
49155847Sbostic 	if (wasdollar) {	/* oops, that was a trailing anchor */
49255847Sbostic 		DROP(1);
49355847Sbostic 		EMIT(OEOL, 0);
49455847Sbostic 		p->g->iflags |= USEEOL;
49560201Sbostic 		p->g->neol++;
49655847Sbostic 	}
49755847Sbostic 
49855847Sbostic 	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
49955847Sbostic }
50055847Sbostic 
50155847Sbostic /*
50255847Sbostic  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
50360201Sbostic  == static int p_simp_re(register struct parse *p, int starordinary);
50455847Sbostic  */
50555847Sbostic static int			/* was the simple RE an unbackslashed $? */
p_simp_re(p,starordinary)50655847Sbostic p_simp_re(p, starordinary)
50755847Sbostic register struct parse *p;
50855847Sbostic int starordinary;		/* is a leading * an ordinary character? */
50955847Sbostic {
51055847Sbostic 	register int c;
51155847Sbostic 	register int count;
51255847Sbostic 	register int count2;
51355847Sbostic 	register sopno pos;
51455847Sbostic 	register int i;
51555847Sbostic 	register sopno subno;
51655847Sbostic #	define	BACKSL	(1<<CHAR_BIT)
51755847Sbostic 
51855847Sbostic 	pos = HERE();		/* repetion op, if any, covers from here */
51955847Sbostic 
52060201Sbostic 	assert(MORE());		/* caller should have ensured this */
52155847Sbostic 	c = GETNEXT();
52260201Sbostic 	if (c == '\\') {
52360201Sbostic 		REQUIRE(MORE(), REG_EESCAPE);
52460201Sbostic 		c = BACKSL | (unsigned char)GETNEXT();
52560201Sbostic 	}
52655847Sbostic 	switch (c) {
52755847Sbostic 	case '.':
52855847Sbostic 		if (p->g->cflags&REG_NEWLINE)
52955847Sbostic 			nonnewline(p);
53055847Sbostic 		else
53155847Sbostic 			EMIT(OANY, 0);
53255847Sbostic 		break;
53355847Sbostic 	case '[':
53455847Sbostic 		p_bracket(p);
53555847Sbostic 		break;
53655847Sbostic 	case BACKSL|'{':
53755847Sbostic 		SETERROR(REG_BADRPT);
53855847Sbostic 		break;
53955847Sbostic 	case BACKSL|'(':
54055847Sbostic 		p->g->nsub++;
54155847Sbostic 		subno = p->g->nsub;
54255847Sbostic 		if (subno < NPAREN)
54355847Sbostic 			p->pbegin[subno] = HERE();
54455847Sbostic 		EMIT(OLPAREN, subno);
54560201Sbostic 		/* the MORE here is an error heuristic */
54660201Sbostic 		if (MORE() && !SEETWO('\\', ')'))
54755847Sbostic 			p_bre(p, '\\', ')');
54855847Sbostic 		if (subno < NPAREN) {
54955847Sbostic 			p->pend[subno] = HERE();
55055847Sbostic 			assert(p->pend[subno] != 0);
55155847Sbostic 		}
55255847Sbostic 		EMIT(ORPAREN, subno);
55355847Sbostic 		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
55455847Sbostic 		break;
55555847Sbostic 	case BACKSL|')':	/* should not get here -- must be user */
55655847Sbostic 	case BACKSL|'}':
55755847Sbostic 		SETERROR(REG_EPAREN);
55855847Sbostic 		break;
55955847Sbostic 	case BACKSL|'1':
56055847Sbostic 	case BACKSL|'2':
56155847Sbostic 	case BACKSL|'3':
56255847Sbostic 	case BACKSL|'4':
56355847Sbostic 	case BACKSL|'5':
56455847Sbostic 	case BACKSL|'6':
56555847Sbostic 	case BACKSL|'7':
56655847Sbostic 	case BACKSL|'8':
56755847Sbostic 	case BACKSL|'9':
56855847Sbostic 		i = (c&~BACKSL) - '0';
56955847Sbostic 		assert(i < NPAREN);
57055847Sbostic 		if (p->pend[i] != 0) {
57155847Sbostic 			assert(i <= p->g->nsub);
57255847Sbostic 			EMIT(OBACK_, i);
57355847Sbostic 			assert(p->pbegin[i] != 0);
57455847Sbostic 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
57555847Sbostic 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
57655847Sbostic 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
57755847Sbostic 			EMIT(O_BACK, i);
57855847Sbostic 		} else
57955847Sbostic 			SETERROR(REG_ESUBREG);
58055847Sbostic 		p->g->backrefs = 1;
58155847Sbostic 		break;
58255847Sbostic 	case '*':
58355847Sbostic 		REQUIRE(starordinary, REG_BADRPT);
58455847Sbostic 		/* FALLTHROUGH */
58555847Sbostic 	default:
58660201Sbostic 		ordinary(p, c &~ BACKSL);
58755847Sbostic 		break;
58855847Sbostic 	}
58955847Sbostic 
59055847Sbostic 	if (EAT('*')) {		/* implemented as +? */
59166381Sbostic 		/* this case does not require the (y|) trick, noKLUDGE */
59255847Sbostic 		INSERT(OPLUS_, pos);
59360201Sbostic 		ASTERN(O_PLUS, pos);
59455847Sbostic 		INSERT(OQUEST_, pos);
59560201Sbostic 		ASTERN(O_QUEST, pos);
59655847Sbostic 	} else if (EATTWO('\\', '{')) {
59755847Sbostic 		count = p_count(p);
59855847Sbostic 		if (EAT(',')) {
59960201Sbostic 			if (MORE() && isdigit(PEEK())) {
60055847Sbostic 				count2 = p_count(p);
60155847Sbostic 				REQUIRE(count <= count2, REG_BADBR);
60255847Sbostic 			} else		/* single number with comma */
60355847Sbostic 				count2 = INFINITY;
60455847Sbostic 		} else		/* just a single number */
60555847Sbostic 			count2 = count;
60655847Sbostic 		repeat(p, pos, count, count2);
60755847Sbostic 		if (!EATTWO('\\', '}')) {	/* error heuristics */
60860201Sbostic 			while (MORE() && !SEETWO('\\', '}'))
60955847Sbostic 				NEXT();
61060201Sbostic 			REQUIRE(MORE(), REG_EBRACE);
61160201Sbostic 			SETERROR(REG_BADBR);
61255847Sbostic 		}
61360201Sbostic 	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
61455847Sbostic 		return(1);
61555847Sbostic 
61655847Sbostic 	return(0);
61755847Sbostic }
61855847Sbostic 
61955847Sbostic /*
62055847Sbostic  - p_count - parse a repetition count
62160201Sbostic  == static int p_count(register struct parse *p);
62255847Sbostic  */
62355847Sbostic static int			/* the value */
p_count(p)62455847Sbostic p_count(p)
62555847Sbostic register struct parse *p;
62655847Sbostic {
62755847Sbostic 	register int count = 0;
62855847Sbostic 	register int ndigits = 0;
62955847Sbostic 
63060201Sbostic 	while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
63155847Sbostic 		count = count*10 + (GETNEXT() - '0');
63255847Sbostic 		ndigits++;
63355847Sbostic 	}
63455847Sbostic 
63555847Sbostic 	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
63655847Sbostic 	return(count);
63755847Sbostic }
63855847Sbostic 
63955847Sbostic /*
64055847Sbostic  - p_bracket - parse a bracketed character list
64160201Sbostic  == static void p_bracket(register struct parse *p);
64255847Sbostic  *
64355847Sbostic  * Note a significant property of this code:  if the allocset() did SETERROR,
64455847Sbostic  * no set operations are done.
64555847Sbostic  */
64655847Sbostic static void
p_bracket(p)64755847Sbostic p_bracket(p)
64855847Sbostic register struct parse *p;
64955847Sbostic {
65060201Sbostic 	register char c;
65155847Sbostic 	register cset *cs = allocset(p);
65255847Sbostic 	register int invert = 0;
65355847Sbostic 
65460201Sbostic 	/* Dept of Truly Sickening Special-Case Kludges */
65560201Sbostic 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
65660201Sbostic 		EMIT(OBOW, 0);
65760201Sbostic 		NEXTn(6);
65860201Sbostic 		return;
65960201Sbostic 	}
66060201Sbostic 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
66160201Sbostic 		EMIT(OEOW, 0);
66260201Sbostic 		NEXTn(6);
66360201Sbostic 		return;
66460201Sbostic 	}
66560201Sbostic 
66655847Sbostic 	if (EAT('^'))
66755847Sbostic 		invert++;	/* make note to invert set at end */
66855847Sbostic 	if (EAT(']'))
66955847Sbostic 		CHadd(cs, ']');
67056618Sbostic 	else if (EAT('-'))
67156618Sbostic 		CHadd(cs, '-');
67260201Sbostic 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
67355847Sbostic 		p_b_term(p, cs);
67455847Sbostic 	if (EAT('-'))
67555847Sbostic 		CHadd(cs, '-');
67655847Sbostic 	MUSTEAT(']', REG_EBRACK);
67755847Sbostic 
67860201Sbostic 	if (p->error != 0)	/* don't mess things up further */
67960201Sbostic 		return;
68060201Sbostic 
68160201Sbostic 	if (p->g->cflags&REG_ICASE) {
68255847Sbostic 		register int i;
68360201Sbostic 		register int ci;
68455847Sbostic 
68555847Sbostic 		for (i = p->g->csetsize - 1; i >= 0; i--)
68660201Sbostic 			if (CHIN(cs, i) && isalpha(i)) {
68760201Sbostic 				ci = othercase(i);
68860201Sbostic 				if (ci != i)
68960201Sbostic 					CHadd(cs, ci);
69060201Sbostic 			}
69160201Sbostic 		if (cs->multis != NULL)
69260201Sbostic 			mccase(p, cs);
69360201Sbostic 	}
69460201Sbostic 	if (invert) {
69560201Sbostic 		register int i;
69660201Sbostic 
69760201Sbostic 		for (i = p->g->csetsize - 1; i >= 0; i--)
69855847Sbostic 			if (CHIN(cs, i))
69955847Sbostic 				CHsub(cs, i);
70055847Sbostic 			else
70155847Sbostic 				CHadd(cs, i);
70255847Sbostic 		if (p->g->cflags&REG_NEWLINE)
70355847Sbostic 			CHsub(cs, '\n');
70455847Sbostic 		if (cs->multis != NULL)
70555847Sbostic 			mcinvert(p, cs);
70655847Sbostic 	}
70760201Sbostic 
70855847Sbostic 	assert(cs->multis == NULL);		/* xxx */
70960201Sbostic 
71060201Sbostic 	if (nch(p, cs) == 1) {		/* optimize singleton sets */
71160201Sbostic 		ordinary(p, firstch(p, cs));
71260201Sbostic 		freeset(p, cs);
71360201Sbostic 	} else
71460201Sbostic 		EMIT(OANYOF, freezeset(p, cs));
71555847Sbostic }
71655847Sbostic 
71755847Sbostic /*
71855847Sbostic  - p_b_term - parse one term of a bracketed character list
71960201Sbostic  == static void p_b_term(register struct parse *p, register cset *cs);
72055847Sbostic  */
72155847Sbostic static void
p_b_term(p,cs)72255847Sbostic p_b_term(p, cs)
72355847Sbostic register struct parse *p;
72455847Sbostic register cset *cs;
72555847Sbostic {
72660201Sbostic 	register char c;
72760201Sbostic 	register char start, finish;
72855847Sbostic 	register int i;
72955847Sbostic 
73055847Sbostic 	/* classify what we've got */
73160201Sbostic 	switch ((MORE()) ? PEEK() : '\0') {
73255847Sbostic 	case '[':
73360201Sbostic 		c = (MORE2()) ? PEEK2() : '\0';
73455847Sbostic 		break;
73555847Sbostic 	case '-':
73655847Sbostic 		SETERROR(REG_ERANGE);
73755847Sbostic 		return;			/* NOTE RETURN */
73855847Sbostic 		break;
73955847Sbostic 	default:
74055847Sbostic 		c = '\0';
74155847Sbostic 		break;
74255847Sbostic 	}
74355847Sbostic 
74455847Sbostic 	switch (c) {
74555847Sbostic 	case ':':		/* character class */
74655847Sbostic 		NEXT2();
74760201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
74855847Sbostic 		c = PEEK();
74955847Sbostic 		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
75055847Sbostic 		p_b_cclass(p, cs);
75160201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
75255847Sbostic 		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
75355847Sbostic 		break;
75455847Sbostic 	case '=':		/* equivalence class */
75555847Sbostic 		NEXT2();
75660201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
75755847Sbostic 		c = PEEK();
75855847Sbostic 		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
75955847Sbostic 		p_b_eclass(p, cs);
76060201Sbostic 		REQUIRE(MORE(), REG_EBRACK);
76155847Sbostic 		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
76255847Sbostic 		break;
76355847Sbostic 	default:		/* symbol, ordinary character, or range */
76455847Sbostic /* xxx revision needed for multichar stuff */
76555847Sbostic 		start = p_b_symbol(p);
76660201Sbostic 		if (SEE('-') && MORE2() && PEEK2() != ']') {
76755847Sbostic 			/* range */
76855847Sbostic 			NEXT();
76955847Sbostic 			if (EAT('-'))
77055847Sbostic 				finish = '-';
77155847Sbostic 			else
77255847Sbostic 				finish = p_b_symbol(p);
77355847Sbostic 		} else
77455847Sbostic 			finish = start;
77560201Sbostic /* xxx what about signed chars here... */
77655847Sbostic 		REQUIRE(start <= finish, REG_ERANGE);
77760201Sbostic 		for (i = start; i <= finish; i++)
77855847Sbostic 			CHadd(cs, i);
77955847Sbostic 		break;
78055847Sbostic 	}
78155847Sbostic }
78255847Sbostic 
78355847Sbostic /*
78455847Sbostic  - p_b_cclass - parse a character-class name and deal with it
78560201Sbostic  == static void p_b_cclass(register struct parse *p, register cset *cs);
78655847Sbostic  */
78755847Sbostic static void
p_b_cclass(p,cs)78855847Sbostic p_b_cclass(p, cs)
78955847Sbostic register struct parse *p;
79055847Sbostic register cset *cs;
79155847Sbostic {
79260201Sbostic 	register char *sp = p->next;
79355847Sbostic 	register struct cclass *cp;
79460201Sbostic 	register size_t len;
79560201Sbostic 	register char *u;
79660201Sbostic 	register char c;
79755847Sbostic 
79860201Sbostic 	while (MORE() && isalpha(PEEK()))
79960201Sbostic 		NEXT();
80060201Sbostic 	len = p->next - sp;
80155847Sbostic 	for (cp = cclasses; cp->name != NULL; cp++)
80260201Sbostic 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
80355847Sbostic 			break;
80455847Sbostic 	if (cp->name == NULL) {
80555847Sbostic 		/* oops, didn't find it */
80655847Sbostic 		SETERROR(REG_ECTYPE);
80755847Sbostic 		return;
80855847Sbostic 	}
80955847Sbostic 
81060201Sbostic 	u = cp->chars;
81155847Sbostic 	while ((c = *u++) != '\0')
81255847Sbostic 		CHadd(cs, c);
81360201Sbostic 	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
81466362Sbostic 		MCadd(p, cs, u);
81555847Sbostic }
81655847Sbostic 
81755847Sbostic /*
81855847Sbostic  - p_b_eclass - parse an equivalence-class name and deal with it
81960201Sbostic  == static void p_b_eclass(register struct parse *p, register cset *cs);
82055847Sbostic  *
82155847Sbostic  * This implementation is incomplete. xxx
82255847Sbostic  */
82355847Sbostic static void
p_b_eclass(p,cs)82455847Sbostic p_b_eclass(p, cs)
82555847Sbostic register struct parse *p;
82655847Sbostic register cset *cs;
82755847Sbostic {
82860201Sbostic 	register char c;
82955847Sbostic 
83055847Sbostic 	c = p_b_coll_elem(p, '=');
83155847Sbostic 	CHadd(cs, c);
83255847Sbostic }
83355847Sbostic 
83455847Sbostic /*
83555847Sbostic  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
83660201Sbostic  == static char p_b_symbol(register struct parse *p);
83755847Sbostic  */
83860201Sbostic static char			/* value of symbol */
p_b_symbol(p)83955847Sbostic p_b_symbol(p)
84055847Sbostic register struct parse *p;
84155847Sbostic {
84260201Sbostic 	register char value;
84355847Sbostic 
84460201Sbostic 	REQUIRE(MORE(), REG_EBRACK);
84560201Sbostic 	if (!EATTWO('[', '.'))
84655847Sbostic 		return(GETNEXT());
84755847Sbostic 
84855847Sbostic 	/* collating symbol */
84955847Sbostic 	value = p_b_coll_elem(p, '.');
85055847Sbostic 	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
85155847Sbostic 	return(value);
85255847Sbostic }
85355847Sbostic 
85455847Sbostic /*
85555847Sbostic  - p_b_coll_elem - parse a collating-element name and look it up
85660201Sbostic  == static char p_b_coll_elem(register struct parse *p, int endc);
85755847Sbostic  */
85860201Sbostic static char			/* value of collating element */
p_b_coll_elem(p,endc)85955847Sbostic p_b_coll_elem(p, endc)
86055847Sbostic register struct parse *p;
86160201Sbostic int endc;			/* name ended by endc,']' */
86255847Sbostic {
86360201Sbostic 	register char *sp = p->next;
86455847Sbostic 	register struct cname *cp;
86555847Sbostic 	register int len;
86660201Sbostic 	register char c;
86755847Sbostic 
86860201Sbostic 	while (MORE() && !SEETWO(endc, ']'))
86955847Sbostic 		NEXT();
87060201Sbostic 	if (!MORE()) {
87155847Sbostic 		SETERROR(REG_EBRACK);
87255847Sbostic 		return(0);
87355847Sbostic 	}
87455847Sbostic 	len = p->next - sp;
87555847Sbostic 	for (cp = cnames; cp->name != NULL; cp++)
87660201Sbostic 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
87755847Sbostic 			return(cp->code);	/* known name */
87855847Sbostic 	if (len == 1)
87955847Sbostic 		return(*sp);	/* single character */
88055847Sbostic 	SETERROR(REG_ECOLLATE);			/* neither */
88155847Sbostic 	return(0);
88255847Sbostic }
88355847Sbostic 
88455847Sbostic /*
88555847Sbostic  - othercase - return the case counterpart of an alphabetic
88660201Sbostic  == static char othercase(int ch);
88755847Sbostic  */
88860201Sbostic static char			/* if no counterpart, return ch */
othercase(ch)88955847Sbostic othercase(ch)
89060201Sbostic int ch;
89155847Sbostic {
89255847Sbostic 	assert(isalpha(ch));
89355847Sbostic 	if (isupper(ch))
89455847Sbostic 		return(tolower(ch));
89555847Sbostic 	else if (islower(ch))
89655847Sbostic 		return(toupper(ch));
89755847Sbostic 	else			/* peculiar, but could happen */
89855847Sbostic 		return(ch);
89955847Sbostic }
90055847Sbostic 
90155847Sbostic /*
90260201Sbostic  - bothcases - emit a dualcase version of a two-case character
90360201Sbostic  == static void bothcases(register struct parse *p, int ch);
90456355Sbostic  *
90555847Sbostic  * Boy, is this implementation ever a kludge...
90655847Sbostic  */
90755847Sbostic static void
bothcases(p,ch)90855847Sbostic bothcases(p, ch)
90955847Sbostic register struct parse *p;
91060201Sbostic int ch;
91155847Sbostic {
91260201Sbostic 	register char *oldnext = p->next;
91360201Sbostic 	register char *oldend = p->end;
91460201Sbostic 	char bracket[3];
91555847Sbostic 
91660201Sbostic 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
91755847Sbostic 	p->next = bracket;
91860201Sbostic 	p->end = bracket+2;
91955847Sbostic 	bracket[0] = ch;
92055847Sbostic 	bracket[1] = ']';
92155847Sbostic 	bracket[2] = '\0';
92255847Sbostic 	p_bracket(p);
92355847Sbostic 	assert(p->next == bracket+2);
92455847Sbostic 	p->next = oldnext;
92560201Sbostic 	p->end = oldend;
92655847Sbostic }
92755847Sbostic 
92855847Sbostic /*
92955847Sbostic  - ordinary - emit an ordinary character
93060201Sbostic  == static void ordinary(register struct parse *p, register int ch);
93155847Sbostic  */
93255847Sbostic static void
ordinary(p,ch)93355847Sbostic ordinary(p, ch)
93455847Sbostic register struct parse *p;
93560201Sbostic register int ch;
93655847Sbostic {
93760201Sbostic 	register cat_t *cap = p->g->categories;
93855847Sbostic 
93960201Sbostic 	if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
94055847Sbostic 		bothcases(p, ch);
94160201Sbostic 	else {
94260201Sbostic 		EMIT(OCHAR, (unsigned char)ch);
94360201Sbostic 		if (cap[ch] == 0)
94460201Sbostic 			cap[ch] = p->g->ncategories++;
94555847Sbostic 	}
94655847Sbostic }
94755847Sbostic 
94855847Sbostic /*
94955847Sbostic  - nonnewline - emit REG_NEWLINE version of OANY
95060201Sbostic  == static void nonnewline(register struct parse *p);
95156355Sbostic  *
95255847Sbostic  * Boy, is this implementation ever a kludge...
95355847Sbostic  */
95455847Sbostic static void
nonnewline(p)95555847Sbostic nonnewline(p)
95655847Sbostic register struct parse *p;
95755847Sbostic {
95860201Sbostic 	register char *oldnext = p->next;
95960201Sbostic 	register char *oldend = p->end;
96060201Sbostic 	char bracket[4];
96155847Sbostic 
96255847Sbostic 	p->next = bracket;
96360201Sbostic 	p->end = bracket+3;
96455847Sbostic 	bracket[0] = '^';
96555847Sbostic 	bracket[1] = '\n';
96655847Sbostic 	bracket[2] = ']';
96755847Sbostic 	bracket[3] = '\0';
96855847Sbostic 	p_bracket(p);
96955847Sbostic 	assert(p->next == bracket+3);
97055847Sbostic 	p->next = oldnext;
97160201Sbostic 	p->end = oldend;
97255847Sbostic }
97355847Sbostic 
97455847Sbostic /*
97555847Sbostic  - repeat - generate code for a bounded repetition, recursively if needed
97660201Sbostic  == static void repeat(register struct parse *p, sopno start, int from, int to);
97755847Sbostic  */
97855847Sbostic static void
repeat(p,start,from,to)97955847Sbostic repeat(p, start, from, to)
98055847Sbostic register struct parse *p;
98155847Sbostic sopno start;			/* operand from here to end of strip */
98255847Sbostic int from;			/* repeated from this number */
98355847Sbostic int to;				/* to this number of times (maybe INFINITY) */
98455847Sbostic {
98555847Sbostic 	register sopno finish = HERE();
98655847Sbostic #	define	N	2
98755847Sbostic #	define	INF	3
98855847Sbostic #	define	REP(f, t)	((f)*8 + (t))
98955847Sbostic #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
99055847Sbostic 	register sopno copy;
99155847Sbostic 
99255847Sbostic 	if (p->error != 0)	/* head off possible runaway recursion */
99355847Sbostic 		return;
99455847Sbostic 
99555847Sbostic 	assert(from <= to);
99655847Sbostic 
99755847Sbostic 	switch (REP(MAP(from), MAP(to))) {
99855847Sbostic 	case REP(0, 0):			/* must be user doing this */
99955847Sbostic 		DROP(finish-start);	/* drop the operand */
100055847Sbostic 		break;
100155847Sbostic 	case REP(0, 1):			/* as x{1,1}? */
100255847Sbostic 	case REP(0, N):			/* as x{1,n}? */
100355847Sbostic 	case REP(0, INF):		/* as x{1,}? */
100466381Sbostic 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
100566381Sbostic 		INSERT(OCH_, start);		/* offset is wrong... */
100655847Sbostic 		repeat(p, start+1, 1, to);
100766381Sbostic 		ASTERN(OOR1, start);
100860201Sbostic 		AHEAD(start);			/* ... fix it */
100966381Sbostic 		EMIT(OOR2, 0);
101066381Sbostic 		AHEAD(THERE());
101166381Sbostic 		ASTERN(O_CH, THERETHERE());
101255847Sbostic 		break;
101355847Sbostic 	case REP(1, 1):			/* trivial case */
101455847Sbostic 		/* done */
101555847Sbostic 		break;
101655847Sbostic 	case REP(1, N):			/* as x?x{1,n-1} */
101766381Sbostic 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
101866381Sbostic 		INSERT(OCH_, start);
101966381Sbostic 		ASTERN(OOR1, start);
102066381Sbostic 		AHEAD(start);
102166381Sbostic 		EMIT(OOR2, 0);			/* offset very wrong... */
102266381Sbostic 		AHEAD(THERE());			/* ...so fix it */
102366381Sbostic 		ASTERN(O_CH, THERETHERE());
102455847Sbostic 		copy = dupl(p, start+1, finish+1);
102566381Sbostic 		assert(copy == finish+4);
102655847Sbostic 		repeat(p, copy, 1, to-1);
102755847Sbostic 		break;
102855847Sbostic 	case REP(1, INF):		/* as x+ */
102955847Sbostic 		INSERT(OPLUS_, start);
103060201Sbostic 		ASTERN(O_PLUS, start);
103155847Sbostic 		break;
103255847Sbostic 	case REP(N, N):			/* as xx{m-1,n-1} */
103355847Sbostic 		copy = dupl(p, start, finish);
103455847Sbostic 		repeat(p, copy, from-1, to-1);
103555847Sbostic 		break;
103655847Sbostic 	case REP(N, INF):		/* as xx{n-1,INF} */
103755847Sbostic 		copy = dupl(p, start, finish);
103855847Sbostic 		repeat(p, copy, from-1, to);
103955847Sbostic 		break;
104055847Sbostic 	default:			/* "can't happen" */
104155847Sbostic 		SETERROR(REG_ASSERT);	/* just in case */
104255847Sbostic 		break;
104355847Sbostic 	}
104455847Sbostic }
104555847Sbostic 
104655847Sbostic /*
104755847Sbostic  - seterr - set an error condition
104860201Sbostic  == static int seterr(register struct parse *p, int e);
104955847Sbostic  */
105055847Sbostic static int			/* useless but makes type checking happy */
seterr(p,e)105155847Sbostic seterr(p, e)
105255847Sbostic register struct parse *p;
105355847Sbostic int e;
105455847Sbostic {
105555847Sbostic 	if (p->error == 0)	/* keep earliest error condition */
105655847Sbostic 		p->error = e;
105755847Sbostic 	p->next = nuls;		/* try to bring things to a halt */
105860201Sbostic 	p->end = nuls;
105955847Sbostic 	return(0);		/* make the return value well-defined */
106055847Sbostic }
106155847Sbostic 
106255847Sbostic /*
106355847Sbostic  - allocset - allocate a set of characters for []
106460201Sbostic  == static cset *allocset(register struct parse *p);
106555847Sbostic  */
106655847Sbostic static cset *
allocset(p)106755847Sbostic allocset(p)
106855847Sbostic register struct parse *p;
106955847Sbostic {
107055847Sbostic 	register int no = p->g->ncsets++;
107155847Sbostic 	register size_t nc;
107255847Sbostic 	register size_t nbytes;
107355847Sbostic 	register cset *cs;
107455847Sbostic 	register size_t css = (size_t)p->g->csetsize;
107566362Sbostic 	register int i;
107655847Sbostic 
107755847Sbostic 	if (no >= p->ncsalloc) {	/* need another column of space */
107855847Sbostic 		p->ncsalloc += CHAR_BIT;
107955847Sbostic 		nc = p->ncsalloc;
108055847Sbostic 		assert(nc % CHAR_BIT == 0);
108155847Sbostic 		nbytes = nc / CHAR_BIT * css;
108255847Sbostic 		if (p->g->sets == NULL)
108355847Sbostic 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
108455847Sbostic 		else
108555847Sbostic 			p->g->sets = (cset *)realloc((char *)p->g->sets,
108655847Sbostic 							nc * sizeof(cset));
108755847Sbostic 		if (p->g->setbits == NULL)
108866362Sbostic 			p->g->setbits = (uch *)malloc(nbytes);
108966362Sbostic 		else {
109066362Sbostic 			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
109155847Sbostic 								nbytes);
109266362Sbostic 			/* xxx this isn't right if setbits is now NULL */
109366362Sbostic 			for (i = 0; i < no; i++)
109466362Sbostic 				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
109566362Sbostic 		}
109655847Sbostic 		if (p->g->sets != NULL && p->g->setbits != NULL)
109755847Sbostic 			(void) memset((char *)p->g->setbits + (nbytes - css),
109855847Sbostic 								0, css);
109955847Sbostic 		else {
110055847Sbostic 			no = 0;
110155847Sbostic 			SETERROR(REG_ESPACE);
110255847Sbostic 			/* caller's responsibility not to do set ops */
110355847Sbostic 		}
110455847Sbostic 	}
110555847Sbostic 
110655847Sbostic 	assert(p->g->sets != NULL);	/* xxx */
110755847Sbostic 	cs = &p->g->sets[no];
110855847Sbostic 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
110955847Sbostic 	cs->mask = 1 << ((no) % CHAR_BIT);
111055847Sbostic 	cs->hash = 0;
111155847Sbostic 	cs->smultis = 0;
111255847Sbostic 	cs->multis = NULL;
111355847Sbostic 
111455847Sbostic 	return(cs);
111555847Sbostic }
111655847Sbostic 
111755847Sbostic /*
111860201Sbostic  - freeset - free a now-unused set
111960201Sbostic  == static void freeset(register struct parse *p, register cset *cs);
112060201Sbostic  */
112160201Sbostic static void
freeset(p,cs)112260201Sbostic freeset(p, cs)
112360201Sbostic register struct parse *p;
112460201Sbostic register cset *cs;
112560201Sbostic {
112660201Sbostic 	register int i;
112760201Sbostic 	register cset *top = &p->g->sets[p->g->ncsets];
112860201Sbostic 	register size_t css = (size_t)p->g->csetsize;
112960201Sbostic 
113060201Sbostic 	for (i = 0; i < css; i++)
113160201Sbostic 		CHsub(cs, i);
113260201Sbostic 	if (cs == top-1)	/* recover only the easy case */
113360201Sbostic 		p->g->ncsets--;
113460201Sbostic }
113560201Sbostic 
113660201Sbostic /*
113755847Sbostic  - freezeset - final processing on a set of characters
113860201Sbostic  == static int freezeset(register struct parse *p, register cset *cs);
113955847Sbostic  *
114055847Sbostic  * The main task here is merging identical sets.  This is usually a waste
114155847Sbostic  * of time (although the hash code minimizes the overhead), but can win
114255847Sbostic  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
114355847Sbostic  * is done using addition rather than xor -- all ASCII [aA] sets xor to
114455847Sbostic  * the same value!
114555847Sbostic  */
114655847Sbostic static int			/* set number */
freezeset(p,cs)114755847Sbostic freezeset(p, cs)
114855847Sbostic register struct parse *p;
114955847Sbostic register cset *cs;
115055847Sbostic {
115166362Sbostic 	register uch h = cs->hash;
115255847Sbostic 	register int i;
115355847Sbostic 	register cset *top = &p->g->sets[p->g->ncsets];
115455847Sbostic 	register cset *cs2;
115555847Sbostic 	register size_t css = (size_t)p->g->csetsize;
115655847Sbostic 
115755847Sbostic 	/* look for an earlier one which is the same */
115855847Sbostic 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
115955847Sbostic 		if (cs2->hash == h && cs2 != cs) {
116055847Sbostic 			/* maybe */
116155847Sbostic 			for (i = 0; i < css; i++)
116255847Sbostic 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
116355847Sbostic 					break;		/* no */
116455847Sbostic 			if (i == css)
116555847Sbostic 				break;			/* yes */
116655847Sbostic 		}
116755847Sbostic 
116855847Sbostic 	if (cs2 < top) {	/* found one */
116960201Sbostic 		freeset(p, cs);
117055847Sbostic 		cs = cs2;
117155847Sbostic 	}
117255847Sbostic 
117355847Sbostic 	return((int)(cs - p->g->sets));
117455847Sbostic }
117555847Sbostic 
117655847Sbostic /*
117760201Sbostic  - firstch - return first character in a set (which must have at least one)
117860201Sbostic  == static int firstch(register struct parse *p, register cset *cs);
117960201Sbostic  */
118060201Sbostic static int			/* character; there is no "none" value */
firstch(p,cs)118160201Sbostic firstch(p, cs)
118260201Sbostic register struct parse *p;
118360201Sbostic register cset *cs;
118460201Sbostic {
118560201Sbostic 	register int i;
118660201Sbostic 	register size_t css = (size_t)p->g->csetsize;
118760201Sbostic 
118860201Sbostic 	for (i = 0; i < css; i++)
118960201Sbostic 		if (CHIN(cs, i))
119060201Sbostic 			return((char)i);
119160201Sbostic 	assert(never);
119260201Sbostic 	return(0);		/* arbitrary */
119360201Sbostic }
119460201Sbostic 
119560201Sbostic /*
119660201Sbostic  - nch - number of characters in a set
119760201Sbostic  == static int nch(register struct parse *p, register cset *cs);
119860201Sbostic  */
119960201Sbostic static int
nch(p,cs)120060201Sbostic nch(p, cs)
120160201Sbostic register struct parse *p;
120260201Sbostic register cset *cs;
120360201Sbostic {
120460201Sbostic 	register int i;
120560201Sbostic 	register size_t css = (size_t)p->g->csetsize;
120660201Sbostic 	register int n = 0;
120760201Sbostic 
120860201Sbostic 	for (i = 0; i < css; i++)
120960201Sbostic 		if (CHIN(cs, i))
121060201Sbostic 			n++;
121160201Sbostic 	return(n);
121260201Sbostic }
121360201Sbostic 
121460201Sbostic /*
121555847Sbostic  - mcadd - add a collating element to a cset
121660201Sbostic  == static void mcadd(register struct parse *p, register cset *cs, \
121760201Sbostic  ==	register char *cp);
121855847Sbostic  */
121955847Sbostic static void
mcadd(p,cs,cp)122055847Sbostic mcadd(p, cs, cp)
122155847Sbostic register struct parse *p;
122255847Sbostic register cset *cs;
122360201Sbostic register char *cp;
122455847Sbostic {
122555847Sbostic 	register size_t oldend = cs->smultis;
122655847Sbostic 
122760201Sbostic 	cs->smultis += strlen(cp) + 1;
122855847Sbostic 	if (cs->multis == NULL)
122960201Sbostic 		cs->multis = malloc(cs->smultis);
123055847Sbostic 	else
123160201Sbostic 		cs->multis = realloc(cs->multis, cs->smultis);
123255847Sbostic 	if (cs->multis == NULL) {
123355847Sbostic 		SETERROR(REG_ESPACE);
123455847Sbostic 		return;
123555847Sbostic 	}
123655847Sbostic 
123760201Sbostic 	(void) strcpy(cs->multis + oldend - 1, cp);
123855847Sbostic 	cs->multis[cs->smultis - 1] = '\0';
123955847Sbostic }
124055847Sbostic 
124155847Sbostic /*
124255847Sbostic  - mcsub - subtract a collating element from a cset
124360201Sbostic  == static void mcsub(register cset *cs, register char *cp);
124455847Sbostic  */
124555847Sbostic static void
mcsub(cs,cp)124660201Sbostic mcsub(cs, cp)
124755847Sbostic register cset *cs;
124860201Sbostic register char *cp;
124955847Sbostic {
125060201Sbostic 	register char *fp = mcfind(cs, cp);
125160201Sbostic 	register size_t len = strlen(fp);
125255847Sbostic 
125360201Sbostic 	assert(fp != NULL);
125460201Sbostic 	(void) memmove(fp, fp + len + 1,
125555847Sbostic 				cs->smultis - (fp + len + 1 - cs->multis));
125655847Sbostic 	cs->smultis -= len;
125755847Sbostic 
125855847Sbostic 	if (cs->smultis == 0) {
125960201Sbostic 		free(cs->multis);
126055847Sbostic 		cs->multis = NULL;
126155847Sbostic 		return;
126255847Sbostic 	}
126355847Sbostic 
126460201Sbostic 	cs->multis = realloc(cs->multis, cs->smultis);
126555847Sbostic 	assert(cs->multis != NULL);
126655847Sbostic }
126755847Sbostic 
126855847Sbostic /*
126955847Sbostic  - mcin - is a collating element in a cset?
127060201Sbostic  == static int mcin(register cset *cs, register char *cp);
127155847Sbostic  */
127255847Sbostic static int
mcin(cs,cp)127360201Sbostic mcin(cs, cp)
127455847Sbostic register cset *cs;
127560201Sbostic register char *cp;
127655847Sbostic {
127755847Sbostic 	return(mcfind(cs, cp) != NULL);
127855847Sbostic }
127955847Sbostic 
128055847Sbostic /*
128155847Sbostic  - mcfind - find a collating element in a cset
128260201Sbostic  == static char *mcfind(register cset *cs, register char *cp);
128355847Sbostic  */
128460201Sbostic static char *
mcfind(cs,cp)128555847Sbostic mcfind(cs, cp)
128655847Sbostic register cset *cs;
128760201Sbostic register char *cp;
128855847Sbostic {
128960201Sbostic 	register char *p;
129055847Sbostic 
129155847Sbostic 	if (cs->multis == NULL)
129255847Sbostic 		return(NULL);
129360201Sbostic 	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
129460201Sbostic 		if (strcmp(cp, p) == 0)
129555847Sbostic 			return(p);
129655847Sbostic 	return(NULL);
129755847Sbostic }
129855847Sbostic 
129955847Sbostic /*
130055847Sbostic  - mcinvert - invert the list of collating elements in a cset
130166362Sbostic  == static void mcinvert(register struct parse *p, register cset *cs);
130255847Sbostic  *
130355847Sbostic  * This would have to know the set of possibilities.  Implementation
130455847Sbostic  * is deferred.
130555847Sbostic  */
130655847Sbostic static void
mcinvert(p,cs)130766362Sbostic mcinvert(p, cs)
130866362Sbostic register struct parse *p;
130955847Sbostic register cset *cs;
131055847Sbostic {
131155847Sbostic 	assert(cs->multis == NULL);	/* xxx */
131255847Sbostic }
131355847Sbostic 
131455847Sbostic /*
131560201Sbostic  - mccase - add case counterparts of the list of collating elements in a cset
131666362Sbostic  == static void mccase(register struct parse *p, register cset *cs);
131760201Sbostic  *
131860201Sbostic  * This would have to know the set of possibilities.  Implementation
131960201Sbostic  * is deferred.
132060201Sbostic  */
132160201Sbostic static void
mccase(p,cs)132266362Sbostic mccase(p, cs)
132366362Sbostic register struct parse *p;
132460201Sbostic register cset *cs;
132560201Sbostic {
132660201Sbostic 	assert(cs->multis == NULL);	/* xxx */
132760201Sbostic }
132860201Sbostic 
132960201Sbostic /*
133055847Sbostic  - isinsets - is this character in any sets?
133160201Sbostic  == static int isinsets(register struct re_guts *g, int c);
133255847Sbostic  */
133355847Sbostic static int			/* predicate */
isinsets(g,c)133455847Sbostic isinsets(g, c)
133555847Sbostic register struct re_guts *g;
133660201Sbostic int c;
133755847Sbostic {
133866362Sbostic 	register uch *col;
133955847Sbostic 	register int i;
134055847Sbostic 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
134160201Sbostic 	register unsigned uc = (unsigned char)c;
134255847Sbostic 
134355847Sbostic 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
134460201Sbostic 		if (col[uc] != 0)
134555847Sbostic 			return(1);
134655847Sbostic 	return(0);
134755847Sbostic }
134855847Sbostic 
134955847Sbostic /*
135055847Sbostic  - samesets - are these two characters in exactly the same sets?
135160201Sbostic  == static int samesets(register struct re_guts *g, int c1, int c2);
135255847Sbostic  */
135355847Sbostic static int			/* predicate */
samesets(g,c1,c2)135455847Sbostic samesets(g, c1, c2)
135555847Sbostic register struct re_guts *g;
135660201Sbostic int c1;
135760201Sbostic int c2;
135855847Sbostic {
135966362Sbostic 	register uch *col;
136055847Sbostic 	register int i;
136155847Sbostic 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
136260201Sbostic 	register unsigned uc1 = (unsigned char)c1;
136360201Sbostic 	register unsigned uc2 = (unsigned char)c2;
136455847Sbostic 
136555847Sbostic 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
136660201Sbostic 		if (col[uc1] != col[uc2])
136755847Sbostic 			return(0);
136855847Sbostic 	return(1);
136955847Sbostic }
137055847Sbostic 
137155847Sbostic /*
137255847Sbostic  - categorize - sort out character categories
137360201Sbostic  == static void categorize(struct parse *p, register struct re_guts *g);
137455847Sbostic  */
137555847Sbostic static void
categorize(p,g)137655847Sbostic categorize(p, g)
137755847Sbostic struct parse *p;
137855847Sbostic register struct re_guts *g;
137955847Sbostic {
138060201Sbostic 	register cat_t *cats = g->categories;
138160201Sbostic 	register int c;
138260201Sbostic 	register int c2;
138360201Sbostic 	register cat_t cat;
138455847Sbostic 
138555847Sbostic 	/* avoid making error situations worse */
138655847Sbostic 	if (p->error != 0)
138755847Sbostic 		return;
138855847Sbostic 
138960201Sbostic 	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
139055847Sbostic 		if (cats[c] == 0 && isinsets(g, c)) {
139155847Sbostic 			cat = g->ncategories++;
139255847Sbostic 			cats[c] = cat;
139360201Sbostic 			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
139455847Sbostic 				if (cats[c2] == 0 && samesets(g, c, c2))
139555847Sbostic 					cats[c2] = cat;
139655847Sbostic 		}
139755847Sbostic }
139855847Sbostic 
139955847Sbostic /*
140055847Sbostic  - dupl - emit a duplicate of a bunch of sops
140160201Sbostic  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
140255847Sbostic  */
140355847Sbostic static sopno			/* start of duplicate */
dupl(p,start,finish)140455847Sbostic dupl(p, start, finish)
140555847Sbostic register struct parse *p;
140655847Sbostic sopno start;			/* from here */
140755847Sbostic sopno finish;			/* to this less one */
140855847Sbostic {
140955847Sbostic 	register sopno ret = HERE();
141055847Sbostic 	register sopno len = finish - start;
141155847Sbostic 
141255847Sbostic 	assert(finish >= start);
141355847Sbostic 	if (len == 0)
141455847Sbostic 		return(ret);
141555847Sbostic 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
141655847Sbostic 	assert(p->ssize >= p->slen + len);
141755847Sbostic 	(void) memcpy((char *)(p->strip + p->slen),
141855847Sbostic 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
141955847Sbostic 	p->slen += len;
142055847Sbostic 	return(ret);
142155847Sbostic }
142255847Sbostic 
142355847Sbostic /*
142455847Sbostic  - doemit - emit a strip operator
142560201Sbostic  == static void doemit(register struct parse *p, sop op, size_t opnd);
142655847Sbostic  *
142755847Sbostic  * It might seem better to implement this as a macro with a function as
142855847Sbostic  * hard-case backup, but it's just too big and messy unless there are
142955847Sbostic  * some changes to the data structures.  Maybe later.
143055847Sbostic  */
143155847Sbostic static void
doemit(p,op,opnd)143255847Sbostic doemit(p, op, opnd)
143355847Sbostic register struct parse *p;
143455847Sbostic sop op;
143555847Sbostic size_t opnd;
143655847Sbostic {
143755847Sbostic 	/* avoid making error situations worse */
143855847Sbostic 	if (p->error != 0)
143955847Sbostic 		return;
144055847Sbostic 
144155847Sbostic 	/* deal with oversize operands ("can't happen", more or less) */
144255847Sbostic 	assert(opnd < 1<<OPSHIFT);
144355847Sbostic 
144455847Sbostic 	/* deal with undersized strip */
144555847Sbostic 	if (p->slen >= p->ssize)
144655847Sbostic 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
144755847Sbostic 	assert(p->slen < p->ssize);
144855847Sbostic 
144955847Sbostic 	/* finally, it's all reduced to the easy case */
145055847Sbostic 	p->strip[p->slen++] = SOP(op, opnd);
145155847Sbostic }
145255847Sbostic 
145355847Sbostic /*
145455847Sbostic  - doinsert - insert a sop into the strip
145560201Sbostic  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
145655847Sbostic  */
145755847Sbostic static void
doinsert(p,op,opnd,pos)145855847Sbostic doinsert(p, op, opnd, pos)
145955847Sbostic register struct parse *p;
146055847Sbostic sop op;
146155847Sbostic size_t opnd;
146255847Sbostic sopno pos;
146355847Sbostic {
146455847Sbostic 	register sopno sn;
146555847Sbostic 	register sop s;
146655847Sbostic 	register int i;
146755847Sbostic 
146855847Sbostic 	/* avoid making error situations worse */
146955847Sbostic 	if (p->error != 0)
147055847Sbostic 		return;
147155847Sbostic 
147255847Sbostic 	sn = HERE();
147355847Sbostic 	EMIT(op, opnd);		/* do checks, ensure space */
147455847Sbostic 	assert(HERE() == sn+1);
147555847Sbostic 	s = p->strip[sn];
147655847Sbostic 
147755847Sbostic 	/* adjust paren pointers */
147855847Sbostic 	assert(pos > 0);
147955847Sbostic 	for (i = 1; i < NPAREN; i++) {
148055847Sbostic 		if (p->pbegin[i] >= pos) {
148155847Sbostic 			p->pbegin[i]++;
148255847Sbostic 		}
148355847Sbostic 		if (p->pend[i] >= pos) {
148455847Sbostic 			p->pend[i]++;
148555847Sbostic 		}
148655847Sbostic 	}
148755847Sbostic 
148855847Sbostic 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
148955847Sbostic 						(HERE()-pos-1)*sizeof(sop));
149055847Sbostic 	p->strip[pos] = s;
149155847Sbostic }
149255847Sbostic 
149355847Sbostic /*
149455847Sbostic  - dofwd - complete a forward reference
149560201Sbostic  == static void dofwd(register struct parse *p, sopno pos, sop value);
149655847Sbostic  */
149755847Sbostic static void
dofwd(p,pos,value)149855847Sbostic dofwd(p, pos, value)
149955847Sbostic register struct parse *p;
150055847Sbostic register sopno pos;
150155847Sbostic sop value;
150255847Sbostic {
150355847Sbostic 	/* avoid making error situations worse */
150455847Sbostic 	if (p->error != 0)
150555847Sbostic 		return;
150655847Sbostic 
150755847Sbostic 	assert(value < 1<<OPSHIFT);
150855847Sbostic 	p->strip[pos] = OP(p->strip[pos]) | value;
150955847Sbostic }
151055847Sbostic 
151155847Sbostic /*
151255847Sbostic  - enlarge - enlarge the strip
151360201Sbostic  == static void enlarge(register struct parse *p, sopno size);
151455847Sbostic  */
151555847Sbostic static void
enlarge(p,size)151655847Sbostic enlarge(p, size)
151755847Sbostic register struct parse *p;
151855847Sbostic register sopno size;
151955847Sbostic {
152055847Sbostic 	register sop *sp;
152155847Sbostic 
152255847Sbostic 	if (p->ssize >= size)
152355847Sbostic 		return;
152455847Sbostic 
152555847Sbostic 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
152655847Sbostic 	if (sp == NULL) {
152755847Sbostic 		SETERROR(REG_ESPACE);
152855847Sbostic 		return;
152955847Sbostic 	}
153055847Sbostic 	p->strip = sp;
153155847Sbostic 	p->ssize = size;
153255847Sbostic }
153355847Sbostic 
153455847Sbostic /*
153555847Sbostic  - stripsnug - compact the strip
153660201Sbostic  == static void stripsnug(register struct parse *p, register struct re_guts *g);
153755847Sbostic  */
153855847Sbostic static void
stripsnug(p,g)153955847Sbostic stripsnug(p, g)
154055847Sbostic register struct parse *p;
154155847Sbostic register struct re_guts *g;
154255847Sbostic {
154355847Sbostic 	g->nstates = p->slen;
154466362Sbostic 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
154555847Sbostic 	if (g->strip == NULL) {
154655847Sbostic 		SETERROR(REG_ESPACE);
154755847Sbostic 		g->strip = p->strip;
154855847Sbostic 	}
154955847Sbostic }
155055847Sbostic 
155155847Sbostic /*
155255847Sbostic  - findmust - fill in must and mlen with longest mandatory literal string
155360201Sbostic  == static void findmust(register struct parse *p, register struct re_guts *g);
155455847Sbostic  *
155555847Sbostic  * This algorithm could do fancy things like analyzing the operands of |
155655847Sbostic  * for common subsequences.  Someday.  This code is simple and finds most
155755847Sbostic  * of the interesting cases.
155855847Sbostic  *
155955847Sbostic  * Note that must and mlen got initialized during setup.
156055847Sbostic  */
156156355Sbostic static void
findmust(p,g)156255847Sbostic findmust(p, g)
156355847Sbostic struct parse *p;
156455847Sbostic register struct re_guts *g;
156555847Sbostic {
156655847Sbostic 	register sop *scan;
156755847Sbostic 	sop *start;
156855847Sbostic 	register sop *newstart;
156955847Sbostic 	register sopno newlen;
157055847Sbostic 	register sop s;
157155847Sbostic 	register char *cp;
157255847Sbostic 	register sopno i;
157355847Sbostic 
157455847Sbostic 	/* avoid making error situations worse */
157555847Sbostic 	if (p->error != 0)
157655847Sbostic 		return;
157755847Sbostic 
157855847Sbostic 	/* find the longest OCHAR sequence in strip */
157955847Sbostic 	newlen = 0;
158055847Sbostic 	scan = g->strip + 1;
158155847Sbostic 	do {
158255847Sbostic 		s = *scan++;
158355847Sbostic 		switch (OP(s)) {
158455847Sbostic 		case OCHAR:		/* sequence member */
158555847Sbostic 			if (newlen == 0)		/* new sequence */
158655847Sbostic 				newstart = scan - 1;
158755847Sbostic 			newlen++;
158855847Sbostic 			break;
158955847Sbostic 		case OPLUS_:		/* things that don't break one */
159055847Sbostic 		case OLPAREN:
159155847Sbostic 		case ORPAREN:
159255847Sbostic 			break;
159355847Sbostic 		case OQUEST_:		/* things that must be skipped */
159455847Sbostic 		case OCH_:
159555847Sbostic 			scan--;
159655847Sbostic 			do {
159755847Sbostic 				scan += OPND(s);
159855847Sbostic 				s = *scan;
159955847Sbostic 				/* assert() interferes w debug printouts */
160055847Sbostic 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
160155847Sbostic 							OP(s) != OOR2) {
160255847Sbostic 					g->iflags |= BAD;
160355847Sbostic 					return;
160455847Sbostic 				}
160555847Sbostic 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
160655847Sbostic 			/* fallthrough */
160755847Sbostic 		default:		/* things that break a sequence */
160855847Sbostic 			if (newlen > g->mlen) {		/* ends one */
160955847Sbostic 				start = newstart;
161055847Sbostic 				g->mlen = newlen;
161155847Sbostic 			}
161255847Sbostic 			newlen = 0;
161355847Sbostic 			break;
161455847Sbostic 		}
161555847Sbostic 	} while (OP(s) != OEND);
161655847Sbostic 
161755847Sbostic 	if (g->mlen == 0)		/* there isn't one */
161855847Sbostic 		return;
161955847Sbostic 
162055847Sbostic 	/* turn it into a character string */
162155847Sbostic 	g->must = malloc((size_t)g->mlen + 1);
162255847Sbostic 	if (g->must == NULL) {		/* argh; just forget it */
162355847Sbostic 		g->mlen = 0;
162455847Sbostic 		return;
162555847Sbostic 	}
162655847Sbostic 	cp = g->must;
162755847Sbostic 	scan = start;
162855847Sbostic 	for (i = g->mlen; i > 0; i--) {
162955847Sbostic 		while (OP(s = *scan++) != OCHAR)
163055847Sbostic 			continue;
163166362Sbostic 		assert(cp < g->must + g->mlen);
163260201Sbostic 		*cp++ = (char)OPND(s);
163355847Sbostic 	}
163466362Sbostic 	assert(cp == g->must + g->mlen);
163555847Sbostic 	*cp++ = '\0';		/* just on general principles */
163655847Sbostic }
163755847Sbostic 
163855847Sbostic /*
163955847Sbostic  - pluscount - count + nesting
164060201Sbostic  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
164155847Sbostic  */
164256355Sbostic static sopno			/* nesting depth */
pluscount(p,g)164355847Sbostic pluscount(p, g)
164455847Sbostic struct parse *p;
164555847Sbostic register struct re_guts *g;
164655847Sbostic {
164755847Sbostic 	register sop *scan;
164855847Sbostic 	register sop s;
164955847Sbostic 	register sopno plusnest = 0;
165055847Sbostic 	register sopno maxnest = 0;
165155847Sbostic 
165255847Sbostic 	if (p->error != 0)
165355847Sbostic 		return(0);	/* there may not be an OEND */
165455847Sbostic 
165555847Sbostic 	scan = g->strip + 1;
165655847Sbostic 	do {
165755847Sbostic 		s = *scan++;
165855847Sbostic 		switch (OP(s)) {
165955847Sbostic 		case OPLUS_:
166055847Sbostic 			plusnest++;
166155847Sbostic 			break;
166255847Sbostic 		case O_PLUS:
166355847Sbostic 			if (plusnest > maxnest)
166455847Sbostic 				maxnest = plusnest;
166555847Sbostic 			plusnest--;
166655847Sbostic 			break;
166755847Sbostic 		}
166855847Sbostic 	} while (OP(s) != OEND);
166955847Sbostic 	if (plusnest != 0)
167055847Sbostic 		g->iflags |= BAD;
167155847Sbostic 	return(maxnest);
167255847Sbostic }
1673