xref: /minix3/lib/libc/regex/regcomp.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	$NetBSD: regcomp.c,v 1.36 2015/09/12 19:08:47 christos Exp $	*/
22fe8fb19SBen Gras 
3b7061124SArun Thomas /*-
4b7061124SArun Thomas  * Copyright (c) 1992, 1993, 1994
5b7061124SArun Thomas  *	The Regents of the University of California.  All rights reserved.
6b7061124SArun Thomas  *
7b7061124SArun Thomas  * This code is derived from software contributed to Berkeley by
8b7061124SArun Thomas  * Henry Spencer.
9b7061124SArun Thomas  *
10b7061124SArun Thomas  * Redistribution and use in source and binary forms, with or without
11b7061124SArun Thomas  * modification, are permitted provided that the following conditions
12b7061124SArun Thomas  * are met:
13b7061124SArun Thomas  * 1. Redistributions of source code must retain the above copyright
14b7061124SArun Thomas  *    notice, this list of conditions and the following disclaimer.
15b7061124SArun Thomas  * 2. Redistributions in binary form must reproduce the above copyright
16b7061124SArun Thomas  *    notice, this list of conditions and the following disclaimer in the
17b7061124SArun Thomas  *    documentation and/or other materials provided with the distribution.
182fe8fb19SBen Gras  * 3. Neither the name of the University nor the names of its contributors
192fe8fb19SBen Gras  *    may be used to endorse or promote products derived from this software
202fe8fb19SBen Gras  *    without specific prior written permission.
212fe8fb19SBen Gras  *
222fe8fb19SBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
232fe8fb19SBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
242fe8fb19SBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
252fe8fb19SBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
262fe8fb19SBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
272fe8fb19SBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
282fe8fb19SBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
292fe8fb19SBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
302fe8fb19SBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
312fe8fb19SBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
322fe8fb19SBen Gras  * SUCH DAMAGE.
332fe8fb19SBen Gras  *
342fe8fb19SBen Gras  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
352fe8fb19SBen Gras  */
362fe8fb19SBen Gras 
372fe8fb19SBen Gras /*-
382fe8fb19SBen Gras  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
392fe8fb19SBen Gras  *
402fe8fb19SBen Gras  * This code is derived from software contributed to Berkeley by
412fe8fb19SBen Gras  * Henry Spencer.
422fe8fb19SBen Gras  *
432fe8fb19SBen Gras  * Redistribution and use in source and binary forms, with or without
442fe8fb19SBen Gras  * modification, are permitted provided that the following conditions
452fe8fb19SBen Gras  * are met:
462fe8fb19SBen Gras  * 1. Redistributions of source code must retain the above copyright
472fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer.
482fe8fb19SBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
492fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer in the
502fe8fb19SBen Gras  *    documentation and/or other materials provided with the distribution.
51b7061124SArun Thomas  * 3. All advertising materials mentioning features or use of this software
52b7061124SArun Thomas  *    must display the following acknowledgement:
53b7061124SArun Thomas  *	This product includes software developed by the University of
54b7061124SArun Thomas  *	California, Berkeley and its contributors.
55b7061124SArun Thomas  * 4. Neither the name of the University nor the names of its contributors
56b7061124SArun Thomas  *    may be used to endorse or promote products derived from this software
57b7061124SArun Thomas  *    without specific prior written permission.
58b7061124SArun Thomas  *
59b7061124SArun Thomas  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60b7061124SArun Thomas  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61b7061124SArun Thomas  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62b7061124SArun Thomas  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63b7061124SArun Thomas  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64b7061124SArun Thomas  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65b7061124SArun Thomas  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66b7061124SArun Thomas  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67b7061124SArun Thomas  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68b7061124SArun Thomas  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69b7061124SArun Thomas  * SUCH DAMAGE.
70b7061124SArun Thomas  *
71b7061124SArun Thomas  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
72b7061124SArun Thomas  */
73b7061124SArun Thomas 
742fe8fb19SBen Gras #include <sys/cdefs.h>
75b7061124SArun Thomas #if defined(LIBC_SCCS) && !defined(lint)
762fe8fb19SBen Gras #if 0
77b7061124SArun Thomas static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
782fe8fb19SBen Gras #else
79*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: regcomp.c,v 1.36 2015/09/12 19:08:47 christos Exp $");
802fe8fb19SBen Gras #endif
81b7061124SArun Thomas #endif /* LIBC_SCCS and not lint */
82b7061124SArun Thomas 
832fe8fb19SBen Gras #include "namespace.h"
84b7061124SArun Thomas #include <sys/types.h>
852fe8fb19SBen Gras 
862fe8fb19SBen Gras #include <assert.h>
87b7061124SArun Thomas #include <ctype.h>
88b7061124SArun Thomas #include <limits.h>
892fe8fb19SBen Gras #include <stdio.h>
90b7061124SArun Thomas #include <stdlib.h>
912fe8fb19SBen Gras #include <string.h>
92b7061124SArun Thomas #include <regex.h>
93b7061124SArun Thomas 
942fe8fb19SBen Gras #ifdef __weak_alias
952fe8fb19SBen Gras __weak_alias(regcomp,_regcomp)
962fe8fb19SBen Gras #endif
972fe8fb19SBen Gras 
98b7061124SArun Thomas #include "utils.h"
99b7061124SArun Thomas #include "regex2.h"
100b7061124SArun Thomas 
101b7061124SArun Thomas #include "cclass.h"
102b7061124SArun Thomas #include "cname.h"
103b7061124SArun Thomas 
104b7061124SArun Thomas /*
105b7061124SArun Thomas  * parse structure, passed up and down to avoid global variables and
106b7061124SArun Thomas  * other clumsinesses
107b7061124SArun Thomas  */
108b7061124SArun Thomas struct parse {
1092fe8fb19SBen Gras 	const char *next;	/* next character in RE */
1102fe8fb19SBen Gras 	const char *end;	/* end of string (-> NUL normally) */
111b7061124SArun Thomas 	int error;		/* has an error been seen? */
112b7061124SArun Thomas 	sop *strip;		/* malloced strip */
113b7061124SArun Thomas 	sopno ssize;		/* malloced strip size (allocated) */
114b7061124SArun Thomas 	sopno slen;		/* malloced strip length (used) */
115f14fb602SLionel Sambuc 	size_t ncsalloc;	/* number of csets allocated */
116b7061124SArun Thomas 	struct re_guts *g;
117b7061124SArun Thomas #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
118b7061124SArun Thomas 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
119b7061124SArun Thomas 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
120b7061124SArun Thomas };
121b7061124SArun Thomas 
122b7061124SArun Thomas /* ========= begin header generated by ./mkh ========= */
123b7061124SArun Thomas #ifdef __cplusplus
124b7061124SArun Thomas extern "C" {
125b7061124SArun Thomas #endif
126b7061124SArun Thomas 
127b7061124SArun Thomas /* === regcomp.c === */
128f14fb602SLionel Sambuc static void p_ere(struct parse *p, int stop, size_t reclimit);
129f14fb602SLionel Sambuc static void p_ere_exp(struct parse *p, size_t reclimit);
130b7061124SArun Thomas static void p_str(struct parse *p);
131f14fb602SLionel Sambuc static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
132f14fb602SLionel Sambuc static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
133b7061124SArun Thomas static int p_count(struct parse *p);
134b7061124SArun Thomas static void p_bracket(struct parse *p);
135b7061124SArun Thomas static void p_b_term(struct parse *p, cset *cs);
136b7061124SArun Thomas static void p_b_cclass(struct parse *p, cset *cs);
137b7061124SArun Thomas static void p_b_eclass(struct parse *p, cset *cs);
138b7061124SArun Thomas static char p_b_symbol(struct parse *p);
139b7061124SArun Thomas static char p_b_coll_elem(struct parse *p, int endc);
1402fe8fb19SBen Gras static int othercase(int ch);
141b7061124SArun Thomas static void bothcases(struct parse *p, int ch);
142b7061124SArun Thomas static void ordinary(struct parse *p, int ch);
143b7061124SArun Thomas static void nonnewline(struct parse *p);
144f14fb602SLionel Sambuc static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
145b7061124SArun Thomas static int seterr(struct parse *p, int e);
146b7061124SArun Thomas static cset *allocset(struct parse *p);
147b7061124SArun Thomas static void freeset(struct parse *p, cset *cs);
148f14fb602SLionel Sambuc static sopno freezeset(struct parse *p, cset *cs);
149b7061124SArun Thomas static int firstch(struct parse *p, cset *cs);
150b7061124SArun Thomas static int nch(struct parse *p, cset *cs);
1512fe8fb19SBen Gras static void mcadd(struct parse *p, cset *cs, const char *cp);
1522fe8fb19SBen Gras #if 0
1532fe8fb19SBen Gras static void mcsub(cset *cs, char *cp);
1542fe8fb19SBen Gras static int mcin(cset *cs, char *cp);
1552fe8fb19SBen Gras static char *mcfind(cset *cs, char *cp);
1562fe8fb19SBen Gras #endif
157b7061124SArun Thomas static void mcinvert(struct parse *p, cset *cs);
158b7061124SArun Thomas static void mccase(struct parse *p, cset *cs);
159b7061124SArun Thomas static int isinsets(struct re_guts *g, int c);
160b7061124SArun Thomas static int samesets(struct re_guts *g, int c1, int c2);
161b7061124SArun Thomas static void categorize(struct parse *p, struct re_guts *g);
162b7061124SArun Thomas static sopno dupl(struct parse *p, sopno start, sopno finish);
1632fe8fb19SBen Gras static void doemit(struct parse *p, sop op, sopno opnd);
1642fe8fb19SBen Gras static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos);
1652fe8fb19SBen Gras static void dofwd(struct parse *p, sopno pos, sopno value);
166f14fb602SLionel Sambuc static int enlarge(struct parse *p, sopno size);
167b7061124SArun Thomas static void stripsnug(struct parse *p, struct re_guts *g);
168b7061124SArun Thomas static void findmust(struct parse *p, struct re_guts *g);
169b7061124SArun Thomas static sopno pluscount(struct parse *p, struct re_guts *g);
170b7061124SArun Thomas 
171b7061124SArun Thomas #ifdef __cplusplus
172b7061124SArun Thomas }
173b7061124SArun Thomas #endif
174b7061124SArun Thomas /* ========= end header generated by ./mkh ========= */
175b7061124SArun Thomas 
176b7061124SArun Thomas static char nuls[10];		/* place to point scanner in event of error */
177b7061124SArun Thomas 
178b7061124SArun Thomas /*
179b7061124SArun Thomas  * macros for use with parse structure
180b7061124SArun Thomas  * BEWARE:  these know that the parse structure is named `p' !!!
181b7061124SArun Thomas  */
182b7061124SArun Thomas #define	PEEK()	(*p->next)
183b7061124SArun Thomas #define	PEEK2()	(*(p->next+1))
184b7061124SArun Thomas #define	MORE()	(p->next < p->end)
185b7061124SArun Thomas #define	MORE2()	(p->next+1 < p->end)
186b7061124SArun Thomas #define	SEE(c)	(MORE() && PEEK() == (c))
187b7061124SArun Thomas #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
188b7061124SArun Thomas #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
189b7061124SArun Thomas #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
190b7061124SArun Thomas #define	NEXT()	(p->next++)
191b7061124SArun Thomas #define	NEXT2()	(p->next += 2)
192b7061124SArun Thomas #define	NEXTn(n)	(p->next += (n))
193b7061124SArun Thomas #define	GETNEXT()	(*p->next++)
194b7061124SArun Thomas #define	SETERROR(e)	seterr(p, (e))
1952fe8fb19SBen Gras #define	REQUIRE(co, e)	(void) ((co) || SETERROR(e))
196b7061124SArun Thomas #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
1972fe8fb19SBen Gras #define	MUSTEAT(c, e)	(void) (REQUIRE(MORE() && GETNEXT() == (c), e))
198b7061124SArun Thomas #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
1992fe8fb19SBen Gras #define	EMIT(op, sopnd)	doemit(p, (sop)(op), sopnd)
200b7061124SArun Thomas #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
201b7061124SArun Thomas #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
202b7061124SArun Thomas #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
203b7061124SArun Thomas #define	HERE()		(p->slen)
204b7061124SArun Thomas #define	THERE()		(p->slen - 1)
205b7061124SArun Thomas #define	THERETHERE()	(p->slen - 2)
206b7061124SArun Thomas #define	DROP(n)	(p->slen -= (n))
207b7061124SArun Thomas 
208b7061124SArun Thomas #ifndef NDEBUG
209b7061124SArun Thomas static int never = 0;		/* for use in asserts; shuts lint up */
210b7061124SArun Thomas #else
211b7061124SArun Thomas #define	never	0		/* some <assert.h>s have bugs too */
212b7061124SArun Thomas #endif
213b7061124SArun Thomas 
214f14fb602SLionel Sambuc #define	MEMLIMIT	0x8000000
215f14fb602SLionel Sambuc #define MEMSIZE(p) \
216f14fb602SLionel Sambuc 	((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
217f14fb602SLionel Sambuc 	(p)->ncsalloc * sizeof(cset) + \
218f14fb602SLionel Sambuc 	(p)->ssize * sizeof(sop))
219f14fb602SLionel Sambuc #define	RECLIMIT	256
220f14fb602SLionel Sambuc 
221b7061124SArun Thomas /*
222b7061124SArun Thomas  - regcomp - interface for parser and compilation
223b7061124SArun Thomas  = extern int regcomp(regex_t *, const char *, int);
224b7061124SArun Thomas  = #define	REG_BASIC	0000
225b7061124SArun Thomas  = #define	REG_EXTENDED	0001
226b7061124SArun Thomas  = #define	REG_ICASE	0002
227b7061124SArun Thomas  = #define	REG_NOSUB	0004
228b7061124SArun Thomas  = #define	REG_NEWLINE	0010
229b7061124SArun Thomas  = #define	REG_NOSPEC	0020
230b7061124SArun Thomas  = #define	REG_PEND	0040
231b7061124SArun Thomas  = #define	REG_DUMP	0200
232b7061124SArun Thomas  */
233b7061124SArun Thomas int				/* 0 success, otherwise REG_something */
regcomp(regex_t * preg,const char * pattern,int cflags)2342fe8fb19SBen Gras regcomp(
2352fe8fb19SBen Gras     regex_t *preg,
2362fe8fb19SBen Gras     const char *pattern,
2372fe8fb19SBen Gras     int cflags)
238b7061124SArun Thomas {
239b7061124SArun Thomas 	struct parse pa;
2402fe8fb19SBen Gras 	struct re_guts *g;
2412fe8fb19SBen Gras 	struct parse *p = &pa;
2422fe8fb19SBen Gras 	int i;
2432fe8fb19SBen Gras 	size_t len;
244b7061124SArun Thomas #ifdef REDEBUG
245b7061124SArun Thomas #	define	GOODFLAGS(f)	(f)
246b7061124SArun Thomas #else
247b7061124SArun Thomas #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
248b7061124SArun Thomas #endif
249b7061124SArun Thomas 
2502fe8fb19SBen Gras 	_DIAGASSERT(preg != NULL);
2512fe8fb19SBen Gras 	_DIAGASSERT(pattern != NULL);
2522fe8fb19SBen Gras 
253b7061124SArun Thomas 	cflags = GOODFLAGS(cflags);
254b7061124SArun Thomas 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
255b7061124SArun Thomas 		return(REG_INVARG);
256b7061124SArun Thomas 
257b7061124SArun Thomas 	if (cflags&REG_PEND) {
258b7061124SArun Thomas 		if (preg->re_endp < pattern)
259b7061124SArun Thomas 			return(REG_INVARG);
260b7061124SArun Thomas 		len = preg->re_endp - pattern;
261b7061124SArun Thomas 	} else
2622fe8fb19SBen Gras 		len = strlen(pattern);
263b7061124SArun Thomas 
264b7061124SArun Thomas 	/* do the mallocs early so failure handling is easy */
265*0a6a1f1dSLionel Sambuc 	g = malloc(sizeof(struct re_guts) + (NC - 1) * sizeof(cat_t));
266b7061124SArun Thomas 	if (g == NULL)
267b7061124SArun Thomas 		return(REG_ESPACE);
268b7061124SArun Thomas 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
269*0a6a1f1dSLionel Sambuc 	p->strip = calloc(p->ssize, sizeof(sop));
270b7061124SArun Thomas 	p->slen = 0;
271b7061124SArun Thomas 	if (p->strip == NULL) {
2722fe8fb19SBen Gras 		free(g);
273b7061124SArun Thomas 		return(REG_ESPACE);
274b7061124SArun Thomas 	}
275b7061124SArun Thomas 
276b7061124SArun Thomas 	/* set things up */
277b7061124SArun Thomas 	p->g = g;
2782fe8fb19SBen Gras 	p->next = pattern;
279b7061124SArun Thomas 	p->end = p->next + len;
280b7061124SArun Thomas 	p->error = 0;
281b7061124SArun Thomas 	p->ncsalloc = 0;
282b7061124SArun Thomas 	for (i = 0; i < NPAREN; i++) {
283b7061124SArun Thomas 		p->pbegin[i] = 0;
284b7061124SArun Thomas 		p->pend[i] = 0;
285b7061124SArun Thomas 	}
286b7061124SArun Thomas 	g->csetsize = NC;
287b7061124SArun Thomas 	g->sets = NULL;
288b7061124SArun Thomas 	g->setbits = NULL;
289b7061124SArun Thomas 	g->ncsets = 0;
290b7061124SArun Thomas 	g->cflags = cflags;
291b7061124SArun Thomas 	g->iflags = 0;
292b7061124SArun Thomas 	g->nbol = 0;
293b7061124SArun Thomas 	g->neol = 0;
294b7061124SArun Thomas 	g->must = NULL;
295b7061124SArun Thomas 	g->mlen = 0;
296b7061124SArun Thomas 	g->nsub = 0;
297b7061124SArun Thomas 	g->ncategories = 1;	/* category 0 is "everything else" */
298b7061124SArun Thomas 	g->categories = &g->catspace[-(CHAR_MIN)];
299b7061124SArun Thomas 	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
300b7061124SArun Thomas 	g->backrefs = 0;
301b7061124SArun Thomas 
302b7061124SArun Thomas 	/* do it */
303b7061124SArun Thomas 	EMIT(OEND, 0);
304b7061124SArun Thomas 	g->firststate = THERE();
305b7061124SArun Thomas 	if (cflags&REG_EXTENDED)
306f14fb602SLionel Sambuc 		p_ere(p, OUT, 0);
307b7061124SArun Thomas 	else if (cflags&REG_NOSPEC)
308b7061124SArun Thomas 		p_str(p);
309b7061124SArun Thomas 	else
310f14fb602SLionel Sambuc 		p_bre(p, OUT, OUT, 0);
311b7061124SArun Thomas 	EMIT(OEND, 0);
312b7061124SArun Thomas 	g->laststate = THERE();
313b7061124SArun Thomas 
314b7061124SArun Thomas 	/* tidy up loose ends and fill things in */
315b7061124SArun Thomas 	categorize(p, g);
316b7061124SArun Thomas 	stripsnug(p, g);
317b7061124SArun Thomas 	findmust(p, g);
318b7061124SArun Thomas 	g->nplus = pluscount(p, g);
319b7061124SArun Thomas 	g->magic = MAGIC2;
320b7061124SArun Thomas 	preg->re_nsub = g->nsub;
321b7061124SArun Thomas 	preg->re_g = g;
322b7061124SArun Thomas 	preg->re_magic = MAGIC1;
323b7061124SArun Thomas #ifndef REDEBUG
324b7061124SArun Thomas 	/* not debugging, so can't rely on the assert() in regexec() */
325b7061124SArun Thomas 	if (g->iflags&BAD)
326b7061124SArun Thomas 		SETERROR(REG_ASSERT);
327b7061124SArun Thomas #endif
328b7061124SArun Thomas 
329b7061124SArun Thomas 	/* win or lose, we're done */
330b7061124SArun Thomas 	if (p->error != 0)	/* lose */
331b7061124SArun Thomas 		regfree(preg);
332b7061124SArun Thomas 	return(p->error);
333b7061124SArun Thomas }
334b7061124SArun Thomas 
335b7061124SArun Thomas /*
336b7061124SArun Thomas  - p_ere - ERE parser top level, concatenation and alternation
337f14fb602SLionel Sambuc  == static void p_ere(struct parse *p, int stop, size_t reclimit);
338b7061124SArun Thomas  */
339b7061124SArun Thomas static void
p_ere(struct parse * p,int stop,size_t reclimit)3402fe8fb19SBen Gras p_ere(
3412fe8fb19SBen Gras     struct parse *p,
342f14fb602SLionel Sambuc     int stop,			/* character this ERE should end at */
343f14fb602SLionel Sambuc     size_t reclimit)
344b7061124SArun Thomas {
3452fe8fb19SBen Gras 	char c;
3462fe8fb19SBen Gras 	sopno prevback = 0;	/* pacify gcc */
3472fe8fb19SBen Gras 	sopno prevfwd = 0; 	/* pacify gcc */
3482fe8fb19SBen Gras 	sopno conc;
3492fe8fb19SBen Gras 	int first = 1;		/* is this the first alternative? */
3502fe8fb19SBen Gras 
3512fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
352b7061124SArun Thomas 
353f14fb602SLionel Sambuc 	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
354f14fb602SLionel Sambuc 		p->error = REG_ESPACE;
355f14fb602SLionel Sambuc 		return;
356f14fb602SLionel Sambuc 	}
357f14fb602SLionel Sambuc 
358b7061124SArun Thomas 	for (;;) {
359b7061124SArun Thomas 		/* do a bunch of concatenated expressions */
360b7061124SArun Thomas 		conc = HERE();
361b7061124SArun Thomas 		while (MORE() && (c = PEEK()) != '|' && c != stop)
362f14fb602SLionel Sambuc 			p_ere_exp(p, reclimit);
363b7061124SArun Thomas 		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
364b7061124SArun Thomas 
365b7061124SArun Thomas 		if (!EAT('|'))
366b7061124SArun Thomas 			break;		/* NOTE BREAK OUT */
367b7061124SArun Thomas 
368b7061124SArun Thomas 		if (first) {
369b7061124SArun Thomas 			INSERT(OCH_, conc);	/* offset is wrong */
370b7061124SArun Thomas 			prevfwd = conc;
371b7061124SArun Thomas 			prevback = conc;
372b7061124SArun Thomas 			first = 0;
373b7061124SArun Thomas 		}
374b7061124SArun Thomas 		ASTERN(OOR1, prevback);
375b7061124SArun Thomas 		prevback = THERE();
376b7061124SArun Thomas 		AHEAD(prevfwd);			/* fix previous offset */
377b7061124SArun Thomas 		prevfwd = HERE();
378b7061124SArun Thomas 		EMIT(OOR2, 0);			/* offset is very wrong */
379b7061124SArun Thomas 	}
380b7061124SArun Thomas 
381b7061124SArun Thomas 	if (!first) {		/* tail-end fixups */
382b7061124SArun Thomas 		AHEAD(prevfwd);
383b7061124SArun Thomas 		ASTERN(O_CH, prevback);
384b7061124SArun Thomas 	}
385b7061124SArun Thomas 
386b7061124SArun Thomas 	assert(!MORE() || SEE(stop));
387b7061124SArun Thomas }
388b7061124SArun Thomas 
389b7061124SArun Thomas /*
390b7061124SArun Thomas  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
391f14fb602SLionel Sambuc  == static void p_ere_exp(struct parse *p, size_t reclimit);
392b7061124SArun Thomas  */
393b7061124SArun Thomas static void
p_ere_exp(struct parse * p,size_t reclimit)3942fe8fb19SBen Gras p_ere_exp(
395f14fb602SLionel Sambuc     struct parse *p,
396f14fb602SLionel Sambuc     size_t reclimit)
397b7061124SArun Thomas {
3982fe8fb19SBen Gras 	char c;
3992fe8fb19SBen Gras 	sopno pos;
4002fe8fb19SBen Gras 	int count;
4012fe8fb19SBen Gras 	int count2;
4022fe8fb19SBen Gras 	sopno subno;
403b7061124SArun Thomas 	int wascaret = 0;
404b7061124SArun Thomas 
4052fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
4062fe8fb19SBen Gras 
407b7061124SArun Thomas 	assert(MORE());		/* caller should have ensured this */
408b7061124SArun Thomas 	c = GETNEXT();
409b7061124SArun Thomas 
410b7061124SArun Thomas 	pos = HERE();
411b7061124SArun Thomas 	switch (c) {
412b7061124SArun Thomas 	case '(':
413b7061124SArun Thomas 		REQUIRE(MORE(), REG_EPAREN);
414b7061124SArun Thomas 		p->g->nsub++;
415b7061124SArun Thomas 		subno = p->g->nsub;
416b7061124SArun Thomas 		if (subno < NPAREN)
417b7061124SArun Thomas 			p->pbegin[subno] = HERE();
418b7061124SArun Thomas 		EMIT(OLPAREN, subno);
419b7061124SArun Thomas 		if (!SEE(')'))
420f14fb602SLionel Sambuc 			p_ere(p, ')', reclimit);
421b7061124SArun Thomas 		if (subno < NPAREN) {
422b7061124SArun Thomas 			p->pend[subno] = HERE();
423b7061124SArun Thomas 			assert(p->pend[subno] != 0);
424b7061124SArun Thomas 		}
425b7061124SArun Thomas 		EMIT(ORPAREN, subno);
426b7061124SArun Thomas 		MUSTEAT(')', REG_EPAREN);
427b7061124SArun Thomas 		break;
428b7061124SArun Thomas #ifndef POSIX_MISTAKE
429b7061124SArun Thomas 	case ')':		/* happens only if no current unmatched ( */
430b7061124SArun Thomas 		/*
431b7061124SArun Thomas 		 * You may ask, why the ifndef?  Because I didn't notice
432b7061124SArun Thomas 		 * this until slightly too late for 1003.2, and none of the
433b7061124SArun Thomas 		 * other 1003.2 regular-expression reviewers noticed it at
434b7061124SArun Thomas 		 * all.  So an unmatched ) is legal POSIX, at least until
435b7061124SArun Thomas 		 * we can get it fixed.
436b7061124SArun Thomas 		 */
437b7061124SArun Thomas 		SETERROR(REG_EPAREN);
438b7061124SArun Thomas 		break;
439b7061124SArun Thomas #endif
440b7061124SArun Thomas 	case '^':
441b7061124SArun Thomas 		EMIT(OBOL, 0);
442b7061124SArun Thomas 		p->g->iflags |= USEBOL;
443b7061124SArun Thomas 		p->g->nbol++;
444b7061124SArun Thomas 		wascaret = 1;
445b7061124SArun Thomas 		break;
446b7061124SArun Thomas 	case '$':
447b7061124SArun Thomas 		EMIT(OEOL, 0);
448b7061124SArun Thomas 		p->g->iflags |= USEEOL;
449b7061124SArun Thomas 		p->g->neol++;
450b7061124SArun Thomas 		break;
451b7061124SArun Thomas 	case '|':
452b7061124SArun Thomas 		SETERROR(REG_EMPTY);
453b7061124SArun Thomas 		break;
454b7061124SArun Thomas 	case '*':
455b7061124SArun Thomas 	case '+':
456b7061124SArun Thomas 	case '?':
457b7061124SArun Thomas 		SETERROR(REG_BADRPT);
458b7061124SArun Thomas 		break;
459b7061124SArun Thomas 	case '.':
460b7061124SArun Thomas 		if (p->g->cflags&REG_NEWLINE)
461b7061124SArun Thomas 			nonnewline(p);
462b7061124SArun Thomas 		else
463b7061124SArun Thomas 			EMIT(OANY, 0);
464b7061124SArun Thomas 		break;
465b7061124SArun Thomas 	case '[':
466b7061124SArun Thomas 		p_bracket(p);
467b7061124SArun Thomas 		break;
468b7061124SArun Thomas 	case '\\':
469b7061124SArun Thomas 		REQUIRE(MORE(), REG_EESCAPE);
470b7061124SArun Thomas 		c = GETNEXT();
471b7061124SArun Thomas 		ordinary(p, c);
472b7061124SArun Thomas 		break;
473b7061124SArun Thomas 	case '{':		/* okay as ordinary except if digit follows */
4742fe8fb19SBen Gras 		REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT);
475b7061124SArun Thomas 		/* FALLTHROUGH */
476b7061124SArun Thomas 	default:
477b7061124SArun Thomas 		ordinary(p, c);
478b7061124SArun Thomas 		break;
479b7061124SArun Thomas 	}
480b7061124SArun Thomas 
481b7061124SArun Thomas 	if (!MORE())
482b7061124SArun Thomas 		return;
483b7061124SArun Thomas 	c = PEEK();
484b7061124SArun Thomas 	/* we call { a repetition if followed by a digit */
485b7061124SArun Thomas 	if (!( c == '*' || c == '+' || c == '?' ||
4862fe8fb19SBen Gras 	    (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ))
487b7061124SArun Thomas 		return;		/* no repetition, we're done */
488b7061124SArun Thomas 	NEXT();
489b7061124SArun Thomas 
490b7061124SArun Thomas 	REQUIRE(!wascaret, REG_BADRPT);
491b7061124SArun Thomas 	switch (c) {
492b7061124SArun Thomas 	case '*':	/* implemented as +? */
493b7061124SArun Thomas 		/* this case does not require the (y|) trick, noKLUDGE */
494b7061124SArun Thomas 		INSERT(OPLUS_, pos);
495b7061124SArun Thomas 		ASTERN(O_PLUS, pos);
496b7061124SArun Thomas 		INSERT(OQUEST_, pos);
497b7061124SArun Thomas 		ASTERN(O_QUEST, pos);
498b7061124SArun Thomas 		break;
499b7061124SArun Thomas 	case '+':
500b7061124SArun Thomas 		INSERT(OPLUS_, pos);
501b7061124SArun Thomas 		ASTERN(O_PLUS, pos);
502b7061124SArun Thomas 		break;
503b7061124SArun Thomas 	case '?':
504b7061124SArun Thomas 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
505b7061124SArun Thomas 		INSERT(OCH_, pos);		/* offset slightly wrong */
506b7061124SArun Thomas 		ASTERN(OOR1, pos);		/* this one's right */
507b7061124SArun Thomas 		AHEAD(pos);			/* fix the OCH_ */
508b7061124SArun Thomas 		EMIT(OOR2, 0);			/* offset very wrong... */
509b7061124SArun Thomas 		AHEAD(THERE());			/* ...so fix it */
510b7061124SArun Thomas 		ASTERN(O_CH, THERETHERE());
511b7061124SArun Thomas 		break;
512b7061124SArun Thomas 	case '{':
513b7061124SArun Thomas 		count = p_count(p);
514b7061124SArun Thomas 		if (EAT(',')) {
5152fe8fb19SBen Gras 			if (isdigit((unsigned char)PEEK())) {
516b7061124SArun Thomas 				count2 = p_count(p);
517b7061124SArun Thomas 				REQUIRE(count <= count2, REG_BADBR);
518b7061124SArun Thomas 			} else		/* single number with comma */
519b7061124SArun Thomas 				count2 = INFINITY;
520b7061124SArun Thomas 		} else		/* just a single number */
521b7061124SArun Thomas 			count2 = count;
522f14fb602SLionel Sambuc 		repeat(p, pos, count, count2, 0);
523b7061124SArun Thomas 		if (!EAT('}')) {	/* error heuristics */
524b7061124SArun Thomas 			while (MORE() && PEEK() != '}')
525b7061124SArun Thomas 				NEXT();
526b7061124SArun Thomas 			REQUIRE(MORE(), REG_EBRACE);
527b7061124SArun Thomas 			SETERROR(REG_BADBR);
528b7061124SArun Thomas 		}
529b7061124SArun Thomas 		break;
530b7061124SArun Thomas 	}
531b7061124SArun Thomas 
532b7061124SArun Thomas 	if (!MORE())
533b7061124SArun Thomas 		return;
534b7061124SArun Thomas 	c = PEEK();
535b7061124SArun Thomas 	if (!( c == '*' || c == '+' || c == '?' ||
5362fe8fb19SBen Gras 	    (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) )
537b7061124SArun Thomas 		return;
538b7061124SArun Thomas 	SETERROR(REG_BADRPT);
539b7061124SArun Thomas }
540b7061124SArun Thomas 
541b7061124SArun Thomas /*
542b7061124SArun Thomas  - p_str - string (no metacharacters) "parser"
5432fe8fb19SBen Gras  == static void p_str(struct parse *p);
544b7061124SArun Thomas  */
545b7061124SArun Thomas static void
p_str(struct parse * p)5462fe8fb19SBen Gras p_str(
5472fe8fb19SBen Gras     struct parse *p)
548b7061124SArun Thomas {
5492fe8fb19SBen Gras 
5502fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
5512fe8fb19SBen Gras 
552b7061124SArun Thomas 	REQUIRE(MORE(), REG_EMPTY);
553b7061124SArun Thomas 	while (MORE())
554b7061124SArun Thomas 		ordinary(p, GETNEXT());
555b7061124SArun Thomas }
556b7061124SArun Thomas 
557b7061124SArun Thomas /*
558b7061124SArun Thomas  - p_bre - BRE parser top level, anchoring and concatenation
5592fe8fb19SBen Gras  == static void p_bre(struct parse *p, int end1, \
560f14fb602SLionel Sambuc  ==	int end2, size_t reclimit);
561b7061124SArun Thomas  * Giving end1 as OUT essentially eliminates the end1/end2 check.
562b7061124SArun Thomas  *
563b7061124SArun Thomas  * This implementation is a bit of a kludge, in that a trailing $ is first
564b7061124SArun Thomas  * taken as an ordinary character and then revised to be an anchor.  The
565b7061124SArun Thomas  * only undesirable side effect is that '$' gets included as a character
566b7061124SArun Thomas  * category in such cases.  This is fairly harmless; not worth fixing.
567b7061124SArun Thomas  * The amount of lookahead needed to avoid this kludge is excessive.
568b7061124SArun Thomas  */
569b7061124SArun Thomas static void
p_bre(struct parse * p,int end1,int end2,size_t reclimit)5702fe8fb19SBen Gras p_bre(
5712fe8fb19SBen Gras     struct parse *p,
5722fe8fb19SBen Gras     int end1,		/* first terminating character */
573f14fb602SLionel Sambuc     int end2,		/* second terminating character */
574f14fb602SLionel Sambuc     size_t reclimit)
575b7061124SArun Thomas {
5762fe8fb19SBen Gras 	sopno start;
5772fe8fb19SBen Gras 	int first = 1;			/* first subexpression? */
5782fe8fb19SBen Gras 	int wasdollar = 0;
5792fe8fb19SBen Gras 
5802fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
5812fe8fb19SBen Gras 
582f14fb602SLionel Sambuc 	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
583f14fb602SLionel Sambuc 		p->error = REG_ESPACE;
584f14fb602SLionel Sambuc 		return;
585f14fb602SLionel Sambuc 	}
586f14fb602SLionel Sambuc 
5872fe8fb19SBen Gras 	start = HERE();
588b7061124SArun Thomas 
589b7061124SArun Thomas 	if (EAT('^')) {
590b7061124SArun Thomas 		EMIT(OBOL, 0);
591b7061124SArun Thomas 		p->g->iflags |= USEBOL;
592b7061124SArun Thomas 		p->g->nbol++;
593b7061124SArun Thomas 	}
594b7061124SArun Thomas 	while (MORE() && !SEETWO(end1, end2)) {
595f14fb602SLionel Sambuc 		wasdollar = p_simp_re(p, first, reclimit);
596b7061124SArun Thomas 		first = 0;
597b7061124SArun Thomas 	}
598b7061124SArun Thomas 	if (wasdollar) {	/* oops, that was a trailing anchor */
599b7061124SArun Thomas 		DROP(1);
600b7061124SArun Thomas 		EMIT(OEOL, 0);
601b7061124SArun Thomas 		p->g->iflags |= USEEOL;
602b7061124SArun Thomas 		p->g->neol++;
603b7061124SArun Thomas 	}
604b7061124SArun Thomas 
605b7061124SArun Thomas 	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
606b7061124SArun Thomas }
607b7061124SArun Thomas 
608b7061124SArun Thomas /*
609b7061124SArun Thomas  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
610f14fb602SLionel Sambuc  == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
611b7061124SArun Thomas  */
612b7061124SArun Thomas static int			/* was the simple RE an unbackslashed $? */
p_simp_re(struct parse * p,int starordinary,size_t reclimit)6132fe8fb19SBen Gras p_simp_re(
6142fe8fb19SBen Gras     struct parse *p,
615f14fb602SLionel Sambuc     int starordinary,		/* is a leading * an ordinary character? */
616f14fb602SLionel Sambuc     size_t reclimit)
617b7061124SArun Thomas {
6182fe8fb19SBen Gras 	int c;
6192fe8fb19SBen Gras 	int count;
6202fe8fb19SBen Gras 	int count2;
621f14fb602SLionel Sambuc 	sopno pos, i;
6222fe8fb19SBen Gras 	sopno subno;
623b7061124SArun Thomas #	define	BACKSL	(1<<CHAR_BIT)
624b7061124SArun Thomas 
6252fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
6262fe8fb19SBen Gras 
627b7061124SArun Thomas 	pos = HERE();		/* repetion op, if any, covers from here */
628b7061124SArun Thomas 
629b7061124SArun Thomas 	assert(MORE());		/* caller should have ensured this */
630b7061124SArun Thomas 	c = GETNEXT();
631b7061124SArun Thomas 	if (c == '\\') {
632b7061124SArun Thomas 		REQUIRE(MORE(), REG_EESCAPE);
633b7061124SArun Thomas 		c = BACKSL | (unsigned char)GETNEXT();
634b7061124SArun Thomas 	}
635b7061124SArun Thomas 	switch (c) {
636b7061124SArun Thomas 	case '.':
637b7061124SArun Thomas 		if (p->g->cflags&REG_NEWLINE)
638b7061124SArun Thomas 			nonnewline(p);
639b7061124SArun Thomas 		else
640b7061124SArun Thomas 			EMIT(OANY, 0);
641b7061124SArun Thomas 		break;
642b7061124SArun Thomas 	case '[':
643b7061124SArun Thomas 		p_bracket(p);
644b7061124SArun Thomas 		break;
645b7061124SArun Thomas 	case BACKSL|'{':
646b7061124SArun Thomas 		SETERROR(REG_BADRPT);
647b7061124SArun Thomas 		break;
648b7061124SArun Thomas 	case BACKSL|'(':
649b7061124SArun Thomas 		p->g->nsub++;
650b7061124SArun Thomas 		subno = p->g->nsub;
651b7061124SArun Thomas 		if (subno < NPAREN)
652b7061124SArun Thomas 			p->pbegin[subno] = HERE();
653b7061124SArun Thomas 		EMIT(OLPAREN, subno);
654b7061124SArun Thomas 		/* the MORE here is an error heuristic */
655b7061124SArun Thomas 		if (MORE() && !SEETWO('\\', ')'))
656f14fb602SLionel Sambuc 			p_bre(p, '\\', ')', reclimit);
657b7061124SArun Thomas 		if (subno < NPAREN) {
658b7061124SArun Thomas 			p->pend[subno] = HERE();
659b7061124SArun Thomas 			assert(p->pend[subno] != 0);
660b7061124SArun Thomas 		}
661b7061124SArun Thomas 		EMIT(ORPAREN, subno);
662b7061124SArun Thomas 		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
663b7061124SArun Thomas 		break;
664b7061124SArun Thomas 	case BACKSL|')':	/* should not get here -- must be user */
665b7061124SArun Thomas 	case BACKSL|'}':
666b7061124SArun Thomas 		SETERROR(REG_EPAREN);
667b7061124SArun Thomas 		break;
668b7061124SArun Thomas 	case BACKSL|'1':
669b7061124SArun Thomas 	case BACKSL|'2':
670b7061124SArun Thomas 	case BACKSL|'3':
671b7061124SArun Thomas 	case BACKSL|'4':
672b7061124SArun Thomas 	case BACKSL|'5':
673b7061124SArun Thomas 	case BACKSL|'6':
674b7061124SArun Thomas 	case BACKSL|'7':
675b7061124SArun Thomas 	case BACKSL|'8':
676b7061124SArun Thomas 	case BACKSL|'9':
677b7061124SArun Thomas 		i = (c&~BACKSL) - '0';
678b7061124SArun Thomas 		assert(i < NPAREN);
679b7061124SArun Thomas 		if (p->pend[i] != 0) {
680b7061124SArun Thomas 			assert(i <= p->g->nsub);
681b7061124SArun Thomas 			EMIT(OBACK_, i);
682b7061124SArun Thomas 			assert(p->pbegin[i] != 0);
683b7061124SArun Thomas 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
684b7061124SArun Thomas 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
685b7061124SArun Thomas 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
686b7061124SArun Thomas 			EMIT(O_BACK, i);
687b7061124SArun Thomas 		} else
688b7061124SArun Thomas 			SETERROR(REG_ESUBREG);
689b7061124SArun Thomas 		p->g->backrefs = 1;
690b7061124SArun Thomas 		break;
691b7061124SArun Thomas 	case '*':
692b7061124SArun Thomas 		REQUIRE(starordinary, REG_BADRPT);
693b7061124SArun Thomas 		/* FALLTHROUGH */
694b7061124SArun Thomas 	default:
695b7061124SArun Thomas 		ordinary(p, c &~ BACKSL);
696b7061124SArun Thomas 		break;
697b7061124SArun Thomas 	}
698b7061124SArun Thomas 
699b7061124SArun Thomas 	if (EAT('*')) {		/* implemented as +? */
700b7061124SArun Thomas 		/* this case does not require the (y|) trick, noKLUDGE */
701b7061124SArun Thomas 		INSERT(OPLUS_, pos);
702b7061124SArun Thomas 		ASTERN(O_PLUS, pos);
703b7061124SArun Thomas 		INSERT(OQUEST_, pos);
704b7061124SArun Thomas 		ASTERN(O_QUEST, pos);
705b7061124SArun Thomas 	} else if (EATTWO('\\', '{')) {
706b7061124SArun Thomas 		count = p_count(p);
707b7061124SArun Thomas 		if (EAT(',')) {
7082fe8fb19SBen Gras 			if (MORE() && isdigit((unsigned char)PEEK())) {
709b7061124SArun Thomas 				count2 = p_count(p);
710b7061124SArun Thomas 				REQUIRE(count <= count2, REG_BADBR);
711b7061124SArun Thomas 			} else		/* single number with comma */
712b7061124SArun Thomas 				count2 = INFINITY;
713b7061124SArun Thomas 		} else		/* just a single number */
714b7061124SArun Thomas 			count2 = count;
715f14fb602SLionel Sambuc 		repeat(p, pos, count, count2, 0);
716b7061124SArun Thomas 		if (!EATTWO('\\', '}')) {	/* error heuristics */
717b7061124SArun Thomas 			while (MORE() && !SEETWO('\\', '}'))
718b7061124SArun Thomas 				NEXT();
719b7061124SArun Thomas 			REQUIRE(MORE(), REG_EBRACE);
720b7061124SArun Thomas 			SETERROR(REG_BADBR);
721b7061124SArun Thomas 		}
722b7061124SArun Thomas 	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
723b7061124SArun Thomas 		return(1);
724b7061124SArun Thomas 
725b7061124SArun Thomas 	return(0);
726b7061124SArun Thomas }
727b7061124SArun Thomas 
728b7061124SArun Thomas /*
729b7061124SArun Thomas  - p_count - parse a repetition count
7302fe8fb19SBen Gras  == static int p_count(struct parse *p);
731b7061124SArun Thomas  */
732b7061124SArun Thomas static int			/* the value */
p_count(struct parse * p)7332fe8fb19SBen Gras p_count(
7342fe8fb19SBen Gras     struct parse *p)
735b7061124SArun Thomas {
7362fe8fb19SBen Gras 	int count = 0;
7372fe8fb19SBen Gras 	int ndigits = 0;
738b7061124SArun Thomas 
7392fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
7402fe8fb19SBen Gras 
7412fe8fb19SBen Gras 	while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) {
742b7061124SArun Thomas 		count = count*10 + (GETNEXT() - '0');
743b7061124SArun Thomas 		ndigits++;
744b7061124SArun Thomas 	}
745b7061124SArun Thomas 
746b7061124SArun Thomas 	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
747b7061124SArun Thomas 	return(count);
748b7061124SArun Thomas }
749b7061124SArun Thomas 
750b7061124SArun Thomas /*
751b7061124SArun Thomas  - p_bracket - parse a bracketed character list
7522fe8fb19SBen Gras  == static void p_bracket(struct parse *p);
753b7061124SArun Thomas  *
754b7061124SArun Thomas  * Note a significant property of this code:  if the allocset() did SETERROR,
755b7061124SArun Thomas  * no set operations are done.
756b7061124SArun Thomas  */
757b7061124SArun Thomas static void
p_bracket(struct parse * p)7582fe8fb19SBen Gras p_bracket(
7592fe8fb19SBen Gras     struct parse *p)
760b7061124SArun Thomas {
7612fe8fb19SBen Gras 	cset *cs;
7622fe8fb19SBen Gras 	int invert = 0;
7632fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
7642fe8fb19SBen Gras 
7652fe8fb19SBen Gras 	cs = allocset(p);
766f14fb602SLionel Sambuc 	if (cs == NULL)
767f14fb602SLionel Sambuc 		return;
768b7061124SArun Thomas 
769b7061124SArun Thomas 	/* Dept of Truly Sickening Special-Case Kludges */
7702fe8fb19SBen Gras 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]",
7712fe8fb19SBen Gras 					    (size_t)6) == 0) {
772b7061124SArun Thomas 		EMIT(OBOW, 0);
773b7061124SArun Thomas 		NEXTn(6);
774b7061124SArun Thomas 		return;
775b7061124SArun Thomas 	}
7762fe8fb19SBen Gras 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]",
7772fe8fb19SBen Gras 					    (size_t)6) == 0) {
778b7061124SArun Thomas 		EMIT(OEOW, 0);
779b7061124SArun Thomas 		NEXTn(6);
780b7061124SArun Thomas 		return;
781b7061124SArun Thomas 	}
782b7061124SArun Thomas 
783b7061124SArun Thomas 	if (EAT('^'))
784b7061124SArun Thomas 		invert++;	/* make note to invert set at end */
785b7061124SArun Thomas 	if (EAT(']'))
786b7061124SArun Thomas 		CHadd(cs, ']');
787b7061124SArun Thomas 	else if (EAT('-'))
788b7061124SArun Thomas 		CHadd(cs, '-');
789b7061124SArun Thomas 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
790b7061124SArun Thomas 		p_b_term(p, cs);
791b7061124SArun Thomas 	if (EAT('-'))
792b7061124SArun Thomas 		CHadd(cs, '-');
793b7061124SArun Thomas 	MUSTEAT(']', REG_EBRACK);
794b7061124SArun Thomas 
795b7061124SArun Thomas 	if (p->error != 0)	/* don't mess things up further */
796b7061124SArun Thomas 		return;
797b7061124SArun Thomas 
798b7061124SArun Thomas 	if (p->g->cflags&REG_ICASE) {
799f14fb602SLionel Sambuc 		ssize_t i;
8002fe8fb19SBen Gras 		int ci;
801b7061124SArun Thomas 
802b7061124SArun Thomas 		for (i = p->g->csetsize - 1; i >= 0; i--)
803b7061124SArun Thomas 			if (CHIN(cs, i) && isalpha(i)) {
804f14fb602SLionel Sambuc 				ci = othercase((int)i);
805b7061124SArun Thomas 				if (ci != i)
806b7061124SArun Thomas 					CHadd(cs, ci);
807b7061124SArun Thomas 			}
808b7061124SArun Thomas 		if (cs->multis != NULL)
809b7061124SArun Thomas 			mccase(p, cs);
810b7061124SArun Thomas 	}
811b7061124SArun Thomas 	if (invert) {
812f14fb602SLionel Sambuc 		ssize_t i;
813b7061124SArun Thomas 
814b7061124SArun Thomas 		for (i = p->g->csetsize - 1; i >= 0; i--)
815b7061124SArun Thomas 			if (CHIN(cs, i))
816f14fb602SLionel Sambuc 				CHsub(cs, (int)i);
817b7061124SArun Thomas 			else
818f14fb602SLionel Sambuc 				CHadd(cs, (int)i);
819b7061124SArun Thomas 		if (p->g->cflags&REG_NEWLINE)
820b7061124SArun Thomas 			CHsub(cs, '\n');
821b7061124SArun Thomas 		if (cs->multis != NULL)
822b7061124SArun Thomas 			mcinvert(p, cs);
823b7061124SArun Thomas 	}
824b7061124SArun Thomas 
825b7061124SArun Thomas 	assert(cs->multis == NULL);		/* xxx */
826b7061124SArun Thomas 
827b7061124SArun Thomas 	if (nch(p, cs) == 1) {		/* optimize singleton sets */
828b7061124SArun Thomas 		ordinary(p, firstch(p, cs));
829b7061124SArun Thomas 		freeset(p, cs);
830b7061124SArun Thomas 	} else
831b7061124SArun Thomas 		EMIT(OANYOF, freezeset(p, cs));
832b7061124SArun Thomas }
833b7061124SArun Thomas 
834b7061124SArun Thomas /*
835b7061124SArun Thomas  - p_b_term - parse one term of a bracketed character list
8362fe8fb19SBen Gras  == static void p_b_term(struct parse *p, cset *cs);
837b7061124SArun Thomas  */
838b7061124SArun Thomas static void
p_b_term(struct parse * p,cset * cs)8392fe8fb19SBen Gras p_b_term(
8402fe8fb19SBen Gras     struct parse *p,
8412fe8fb19SBen Gras     cset *cs)
842b7061124SArun Thomas {
8432fe8fb19SBen Gras 	char c;
8442fe8fb19SBen Gras 	char start, finish;
8452fe8fb19SBen Gras 	int i;
8462fe8fb19SBen Gras 
8472fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
8482fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
849b7061124SArun Thomas 
850b7061124SArun Thomas 	/* classify what we've got */
851b7061124SArun Thomas 	switch ((MORE()) ? PEEK() : '\0') {
852b7061124SArun Thomas 	case '[':
853b7061124SArun Thomas 		c = (MORE2()) ? PEEK2() : '\0';
854b7061124SArun Thomas 		break;
8552fe8fb19SBen Gras 
856b7061124SArun Thomas 	case '-':
857b7061124SArun Thomas 		SETERROR(REG_ERANGE);
858b7061124SArun Thomas 		return;			/* NOTE RETURN */
8592fe8fb19SBen Gras 
860b7061124SArun Thomas 	default:
861b7061124SArun Thomas 		c = '\0';
862b7061124SArun Thomas 		break;
863b7061124SArun Thomas 	}
864b7061124SArun Thomas 
865b7061124SArun Thomas 	switch (c) {
866b7061124SArun Thomas 	case ':':		/* character class */
867b7061124SArun Thomas 		NEXT2();
868b7061124SArun Thomas 		REQUIRE(MORE(), REG_EBRACK);
869b7061124SArun Thomas 		c = PEEK();
870b7061124SArun Thomas 		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
871b7061124SArun Thomas 		p_b_cclass(p, cs);
872b7061124SArun Thomas 		REQUIRE(MORE(), REG_EBRACK);
873b7061124SArun Thomas 		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
874b7061124SArun Thomas 		break;
875b7061124SArun Thomas 	case '=':		/* equivalence class */
876b7061124SArun Thomas 		NEXT2();
877b7061124SArun Thomas 		REQUIRE(MORE(), REG_EBRACK);
878b7061124SArun Thomas 		c = PEEK();
879b7061124SArun Thomas 		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
880b7061124SArun Thomas 		p_b_eclass(p, cs);
881b7061124SArun Thomas 		REQUIRE(MORE(), REG_EBRACK);
882b7061124SArun Thomas 		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
883b7061124SArun Thomas 		break;
884b7061124SArun Thomas 	default:		/* symbol, ordinary character, or range */
885b7061124SArun Thomas /* xxx revision needed for multichar stuff */
886b7061124SArun Thomas 		start = p_b_symbol(p);
887b7061124SArun Thomas 		if (SEE('-') && MORE2() && PEEK2() != ']') {
888b7061124SArun Thomas 			/* range */
889b7061124SArun Thomas 			NEXT();
890b7061124SArun Thomas 			if (EAT('-'))
891b7061124SArun Thomas 				finish = '-';
892b7061124SArun Thomas 			else
893b7061124SArun Thomas 				finish = p_b_symbol(p);
894b7061124SArun Thomas 		} else
895b7061124SArun Thomas 			finish = start;
896b7061124SArun Thomas /* xxx what about signed chars here... */
897b7061124SArun Thomas 		REQUIRE(start <= finish, REG_ERANGE);
898b7061124SArun Thomas 		for (i = start; i <= finish; i++)
899b7061124SArun Thomas 			CHadd(cs, i);
900b7061124SArun Thomas 		break;
901b7061124SArun Thomas 	}
902b7061124SArun Thomas }
903b7061124SArun Thomas 
904b7061124SArun Thomas /*
905b7061124SArun Thomas  - p_b_cclass - parse a character-class name and deal with it
9062fe8fb19SBen Gras  == static void p_b_cclass(struct parse *p, cset *cs);
907b7061124SArun Thomas  */
908b7061124SArun Thomas static void
p_b_cclass(struct parse * p,cset * cs)9092fe8fb19SBen Gras p_b_cclass(
9102fe8fb19SBen Gras     struct parse *p,
9112fe8fb19SBen Gras     cset *cs)
912b7061124SArun Thomas {
9132fe8fb19SBen Gras 	const char *sp;
9142fe8fb19SBen Gras 	const struct cclass *cp;
9152fe8fb19SBen Gras 	size_t len;
9162fe8fb19SBen Gras 	const char *u;
9172fe8fb19SBen Gras 	char c;
918b7061124SArun Thomas 
9192fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
9202fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
9212fe8fb19SBen Gras 
9222fe8fb19SBen Gras 	sp = p->next;
9232fe8fb19SBen Gras 
9242fe8fb19SBen Gras 	while (MORE() && isalpha((unsigned char)PEEK()))
925b7061124SArun Thomas 		NEXT();
926b7061124SArun Thomas 	len = p->next - sp;
927b7061124SArun Thomas 	for (cp = cclasses; cp->name != NULL; cp++)
928b7061124SArun Thomas 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
929b7061124SArun Thomas 			break;
930b7061124SArun Thomas 	if (cp->name == NULL) {
931b7061124SArun Thomas 		/* oops, didn't find it */
932b7061124SArun Thomas 		SETERROR(REG_ECTYPE);
933b7061124SArun Thomas 		return;
934b7061124SArun Thomas 	}
935b7061124SArun Thomas 
936b7061124SArun Thomas 	u = cp->chars;
937b7061124SArun Thomas 	while ((c = *u++) != '\0')
938b7061124SArun Thomas 		CHadd(cs, c);
939b7061124SArun Thomas 	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
940b7061124SArun Thomas 		MCadd(p, cs, u);
941b7061124SArun Thomas }
942b7061124SArun Thomas 
943b7061124SArun Thomas /*
944b7061124SArun Thomas  - p_b_eclass - parse an equivalence-class name and deal with it
9452fe8fb19SBen Gras  == static void p_b_eclass(struct parse *p, cset *cs);
946b7061124SArun Thomas  *
947b7061124SArun Thomas  * This implementation is incomplete. xxx
948b7061124SArun Thomas  */
949b7061124SArun Thomas static void
p_b_eclass(struct parse * p,cset * cs)9502fe8fb19SBen Gras p_b_eclass(
9512fe8fb19SBen Gras     struct parse *p,
9522fe8fb19SBen Gras     cset *cs)
953b7061124SArun Thomas {
9542fe8fb19SBen Gras 	char c;
9552fe8fb19SBen Gras 
9562fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
9572fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
958b7061124SArun Thomas 
959b7061124SArun Thomas 	c = p_b_coll_elem(p, '=');
960b7061124SArun Thomas 	CHadd(cs, c);
961b7061124SArun Thomas }
962b7061124SArun Thomas 
963b7061124SArun Thomas /*
964b7061124SArun Thomas  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
9652fe8fb19SBen Gras  == static char p_b_symbol(struct parse *p);
966b7061124SArun Thomas  */
967b7061124SArun Thomas static char			/* value of symbol */
p_b_symbol(struct parse * p)9682fe8fb19SBen Gras p_b_symbol(
9692fe8fb19SBen Gras     struct parse *p)
970b7061124SArun Thomas {
9712fe8fb19SBen Gras 	char value;
9722fe8fb19SBen Gras 
9732fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
974b7061124SArun Thomas 
975b7061124SArun Thomas 	REQUIRE(MORE(), REG_EBRACK);
976b7061124SArun Thomas 	if (!EATTWO('[', '.'))
977b7061124SArun Thomas 		return(GETNEXT());
978b7061124SArun Thomas 
979b7061124SArun Thomas 	/* collating symbol */
980b7061124SArun Thomas 	value = p_b_coll_elem(p, '.');
981b7061124SArun Thomas 	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
982b7061124SArun Thomas 	return(value);
983b7061124SArun Thomas }
984b7061124SArun Thomas 
985b7061124SArun Thomas /*
986b7061124SArun Thomas  - p_b_coll_elem - parse a collating-element name and look it up
9872fe8fb19SBen Gras  == static char p_b_coll_elem(struct parse *p, int endc);
988b7061124SArun Thomas  */
989b7061124SArun Thomas static char			/* value of collating element */
p_b_coll_elem(struct parse * p,int endc)9902fe8fb19SBen Gras p_b_coll_elem(
9912fe8fb19SBen Gras     struct parse *p,
9922fe8fb19SBen Gras     int endc)			/* name ended by endc,']' */
993b7061124SArun Thomas {
9942fe8fb19SBen Gras 	const char *sp;
9952fe8fb19SBen Gras 	const struct cname *cp;
9962fe8fb19SBen Gras 	size_t len;
9972fe8fb19SBen Gras 
9982fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
9992fe8fb19SBen Gras 
10002fe8fb19SBen Gras 	sp = p->next;
1001b7061124SArun Thomas 
1002b7061124SArun Thomas 	while (MORE() && !SEETWO(endc, ']'))
1003b7061124SArun Thomas 		NEXT();
1004b7061124SArun Thomas 	if (!MORE()) {
1005b7061124SArun Thomas 		SETERROR(REG_EBRACK);
1006b7061124SArun Thomas 		return(0);
1007b7061124SArun Thomas 	}
1008b7061124SArun Thomas 	len = p->next - sp;
1009b7061124SArun Thomas 	for (cp = cnames; cp->name != NULL; cp++)
1010b7061124SArun Thomas 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
1011b7061124SArun Thomas 			return(cp->code);	/* known name */
1012b7061124SArun Thomas 	if (len == 1)
1013b7061124SArun Thomas 		return(*sp);	/* single character */
1014b7061124SArun Thomas 	SETERROR(REG_ECOLLATE);			/* neither */
1015b7061124SArun Thomas 	return(0);
1016b7061124SArun Thomas }
1017b7061124SArun Thomas 
1018b7061124SArun Thomas /*
1019b7061124SArun Thomas  - othercase - return the case counterpart of an alphabetic
10202fe8fb19SBen Gras  == static int othercase(int ch);
1021b7061124SArun Thomas  */
10222fe8fb19SBen Gras static int			/* if no counterpart, return ch */
othercase(int ch)10232fe8fb19SBen Gras othercase(
10242fe8fb19SBen Gras     int ch)
1025b7061124SArun Thomas {
1026b7061124SArun Thomas 	assert(isalpha(ch));
1027b7061124SArun Thomas 	if (isupper(ch))
1028b7061124SArun Thomas 		return(tolower(ch));
1029b7061124SArun Thomas 	else if (islower(ch))
1030b7061124SArun Thomas 		return(toupper(ch));
1031b7061124SArun Thomas 	else			/* peculiar, but could happen */
1032b7061124SArun Thomas 		return(ch);
1033b7061124SArun Thomas }
1034b7061124SArun Thomas 
1035b7061124SArun Thomas /*
1036b7061124SArun Thomas  - bothcases - emit a dualcase version of a two-case character
10372fe8fb19SBen Gras  == static void bothcases(struct parse *p, int ch);
1038b7061124SArun Thomas  *
1039b7061124SArun Thomas  * Boy, is this implementation ever a kludge...
1040b7061124SArun Thomas  */
1041b7061124SArun Thomas static void
bothcases(struct parse * p,int ch)10422fe8fb19SBen Gras bothcases(
10432fe8fb19SBen Gras     struct parse *p,
10442fe8fb19SBen Gras     int ch)
1045b7061124SArun Thomas {
10462fe8fb19SBen Gras 	const char *oldnext;
10472fe8fb19SBen Gras 	const char *oldend;
1048b7061124SArun Thomas 	char bracket[3];
1049b7061124SArun Thomas 
10502fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
10512fe8fb19SBen Gras 
10522fe8fb19SBen Gras 	oldnext = p->next;
10532fe8fb19SBen Gras 	oldend = p->end;
10542fe8fb19SBen Gras 
1055b7061124SArun Thomas 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
1056b7061124SArun Thomas 	p->next = bracket;
1057b7061124SArun Thomas 	p->end = bracket+2;
1058b7061124SArun Thomas 	bracket[0] = ch;
1059b7061124SArun Thomas 	bracket[1] = ']';
1060b7061124SArun Thomas 	bracket[2] = '\0';
1061b7061124SArun Thomas 	p_bracket(p);
1062b7061124SArun Thomas 	assert(p->next == bracket+2);
1063b7061124SArun Thomas 	p->next = oldnext;
1064b7061124SArun Thomas 	p->end = oldend;
1065b7061124SArun Thomas }
1066b7061124SArun Thomas 
1067b7061124SArun Thomas /*
1068b7061124SArun Thomas  - ordinary - emit an ordinary character
10692fe8fb19SBen Gras  == static void ordinary(struct parse *p, int ch);
1070b7061124SArun Thomas  */
1071b7061124SArun Thomas static void
ordinary(struct parse * p,int ch)10722fe8fb19SBen Gras ordinary(
10732fe8fb19SBen Gras     struct parse *p,
10742fe8fb19SBen Gras     int ch)
1075b7061124SArun Thomas {
10762fe8fb19SBen Gras 	cat_t *cap;
1077*0a6a1f1dSLionel Sambuc 	unsigned char uc = (unsigned char)ch;
1078b7061124SArun Thomas 
10792fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
10802fe8fb19SBen Gras 
10812fe8fb19SBen Gras 	cap = p->g->categories;
1082*0a6a1f1dSLionel Sambuc 	if ((p->g->cflags & REG_ICASE) && isalpha(uc) && othercase(uc) != uc)
1083*0a6a1f1dSLionel Sambuc 		bothcases(p, uc);
1084b7061124SArun Thomas 	else {
1085*0a6a1f1dSLionel Sambuc 		EMIT(OCHAR, (sopno)uc);
1086*0a6a1f1dSLionel Sambuc 		if (cap[uc] == 0) {
1087f14fb602SLionel Sambuc 			_DIAGASSERT(__type_fit(unsigned char,
1088f14fb602SLionel Sambuc 			    p->g->ncategories + 1));
1089*0a6a1f1dSLionel Sambuc 			cap[uc] = (unsigned char)p->g->ncategories++;
1090f14fb602SLionel Sambuc 		}
1091b7061124SArun Thomas 	}
1092b7061124SArun Thomas }
1093b7061124SArun Thomas 
1094b7061124SArun Thomas /*
1095b7061124SArun Thomas  - nonnewline - emit REG_NEWLINE version of OANY
10962fe8fb19SBen Gras  == static void nonnewline(struct parse *p);
1097b7061124SArun Thomas  *
1098b7061124SArun Thomas  * Boy, is this implementation ever a kludge...
1099b7061124SArun Thomas  */
1100b7061124SArun Thomas static void
nonnewline(struct parse * p)11012fe8fb19SBen Gras nonnewline(
11022fe8fb19SBen Gras     struct parse *p)
1103b7061124SArun Thomas {
11042fe8fb19SBen Gras 	const char *oldnext;
11052fe8fb19SBen Gras 	const char *oldend;
1106b7061124SArun Thomas 	char bracket[4];
1107b7061124SArun Thomas 
11082fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
11092fe8fb19SBen Gras 
11102fe8fb19SBen Gras 	oldnext = p->next;
11112fe8fb19SBen Gras 	oldend = p->end;
11122fe8fb19SBen Gras 
1113b7061124SArun Thomas 	p->next = bracket;
1114b7061124SArun Thomas 	p->end = bracket+3;
1115b7061124SArun Thomas 	bracket[0] = '^';
1116b7061124SArun Thomas 	bracket[1] = '\n';
1117b7061124SArun Thomas 	bracket[2] = ']';
1118b7061124SArun Thomas 	bracket[3] = '\0';
1119b7061124SArun Thomas 	p_bracket(p);
1120b7061124SArun Thomas 	assert(p->next == bracket+3);
1121b7061124SArun Thomas 	p->next = oldnext;
1122b7061124SArun Thomas 	p->end = oldend;
1123b7061124SArun Thomas }
1124b7061124SArun Thomas 
1125b7061124SArun Thomas /*
1126b7061124SArun Thomas  - repeat - generate code for a bounded repetition, recursively if needed
1127f14fb602SLionel Sambuc  == static void repeat(struct parse *p, sopno start, int from, int to,
1128f14fb602SLionel Sambuc  == size_t reclimit);
1129b7061124SArun Thomas  */
1130b7061124SArun Thomas static void
repeat(struct parse * p,sopno start,int from,int to,size_t reclimit)11312fe8fb19SBen Gras repeat(
11322fe8fb19SBen Gras     struct parse *p,
11332fe8fb19SBen Gras     sopno start,		/* operand from here to end of strip */
11342fe8fb19SBen Gras     int from,			/* repeated from this number */
1135f14fb602SLionel Sambuc     int to,			/* to this number of times (maybe INFINITY) */
1136f14fb602SLionel Sambuc     size_t reclimit)
1137b7061124SArun Thomas {
11382fe8fb19SBen Gras 	sopno finish;
1139b7061124SArun Thomas #	define	N	2
1140b7061124SArun Thomas #	define	INF	3
1141b7061124SArun Thomas #	define	REP(f, t)	((f)*8 + (t))
1142b7061124SArun Thomas #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
11432fe8fb19SBen Gras 	sopno copy;
11442fe8fb19SBen Gras 
11452fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
11462fe8fb19SBen Gras 
1147f14fb602SLionel Sambuc 	if (reclimit++ > RECLIMIT)
1148f14fb602SLionel Sambuc 		p->error = REG_ESPACE;
1149f14fb602SLionel Sambuc 	if (p->error)
1150b7061124SArun Thomas 		return;
1151b7061124SArun Thomas 
1152f14fb602SLionel Sambuc 	finish = HERE();
1153f14fb602SLionel Sambuc 
1154b7061124SArun Thomas 	assert(from <= to);
1155b7061124SArun Thomas 
1156b7061124SArun Thomas 	switch (REP(MAP(from), MAP(to))) {
1157b7061124SArun Thomas 	case REP(0, 0):			/* must be user doing this */
1158b7061124SArun Thomas 		DROP(finish-start);	/* drop the operand */
1159b7061124SArun Thomas 		break;
1160b7061124SArun Thomas 	case REP(0, 1):			/* as x{1,1}? */
1161b7061124SArun Thomas 	case REP(0, N):			/* as x{1,n}? */
1162b7061124SArun Thomas 	case REP(0, INF):		/* as x{1,}? */
1163b7061124SArun Thomas 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1164b7061124SArun Thomas 		INSERT(OCH_, start);		/* offset is wrong... */
1165f14fb602SLionel Sambuc 		repeat(p, start+1, 1, to, reclimit);
1166b7061124SArun Thomas 		ASTERN(OOR1, start);
1167b7061124SArun Thomas 		AHEAD(start);			/* ... fix it */
1168b7061124SArun Thomas 		EMIT(OOR2, 0);
1169b7061124SArun Thomas 		AHEAD(THERE());
1170b7061124SArun Thomas 		ASTERN(O_CH, THERETHERE());
1171b7061124SArun Thomas 		break;
1172b7061124SArun Thomas 	case REP(1, 1):			/* trivial case */
1173b7061124SArun Thomas 		/* done */
1174b7061124SArun Thomas 		break;
1175b7061124SArun Thomas 	case REP(1, N):			/* as x?x{1,n-1} */
1176b7061124SArun Thomas 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1177b7061124SArun Thomas 		INSERT(OCH_, start);
1178b7061124SArun Thomas 		ASTERN(OOR1, start);
1179b7061124SArun Thomas 		AHEAD(start);
1180b7061124SArun Thomas 		EMIT(OOR2, 0);			/* offset very wrong... */
1181b7061124SArun Thomas 		AHEAD(THERE());			/* ...so fix it */
1182b7061124SArun Thomas 		ASTERN(O_CH, THERETHERE());
1183b7061124SArun Thomas 		copy = dupl(p, start+1, finish+1);
1184b7061124SArun Thomas 		assert(copy == finish+4);
1185f14fb602SLionel Sambuc 		repeat(p, copy, 1, to-1, reclimit);
1186b7061124SArun Thomas 		break;
1187b7061124SArun Thomas 	case REP(1, INF):		/* as x+ */
1188b7061124SArun Thomas 		INSERT(OPLUS_, start);
1189b7061124SArun Thomas 		ASTERN(O_PLUS, start);
1190b7061124SArun Thomas 		break;
1191b7061124SArun Thomas 	case REP(N, N):			/* as xx{m-1,n-1} */
1192b7061124SArun Thomas 		copy = dupl(p, start, finish);
1193f14fb602SLionel Sambuc 		repeat(p, copy, from-1, to-1, reclimit);
1194b7061124SArun Thomas 		break;
1195b7061124SArun Thomas 	case REP(N, INF):		/* as xx{n-1,INF} */
1196b7061124SArun Thomas 		copy = dupl(p, start, finish);
1197f14fb602SLionel Sambuc 		repeat(p, copy, from-1, to, reclimit);
1198b7061124SArun Thomas 		break;
1199b7061124SArun Thomas 	default:			/* "can't happen" */
1200b7061124SArun Thomas 		SETERROR(REG_ASSERT);	/* just in case */
1201b7061124SArun Thomas 		break;
1202b7061124SArun Thomas 	}
1203b7061124SArun Thomas }
1204b7061124SArun Thomas 
1205b7061124SArun Thomas /*
1206b7061124SArun Thomas  - seterr - set an error condition
12072fe8fb19SBen Gras  == static int seterr(struct parse *p, int e);
1208b7061124SArun Thomas  */
1209b7061124SArun Thomas static int			/* useless but makes type checking happy */
seterr(struct parse * p,int e)12102fe8fb19SBen Gras seterr(
12112fe8fb19SBen Gras     struct parse *p,
12122fe8fb19SBen Gras     int e)
1213b7061124SArun Thomas {
12142fe8fb19SBen Gras 
12152fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
12162fe8fb19SBen Gras 
1217b7061124SArun Thomas 	if (p->error == 0)	/* keep earliest error condition */
1218b7061124SArun Thomas 		p->error = e;
1219b7061124SArun Thomas 	p->next = nuls;		/* try to bring things to a halt */
1220b7061124SArun Thomas 	p->end = nuls;
1221b7061124SArun Thomas 	return(0);		/* make the return value well-defined */
1222b7061124SArun Thomas }
1223b7061124SArun Thomas 
1224b7061124SArun Thomas /*
1225b7061124SArun Thomas  - allocset - allocate a set of characters for []
12262fe8fb19SBen Gras  == static cset *allocset(struct parse *p);
1227b7061124SArun Thomas  */
1228b7061124SArun Thomas static cset *
allocset(struct parse * p)12292fe8fb19SBen Gras allocset(
12302fe8fb19SBen Gras     struct parse *p)
1231b7061124SArun Thomas {
1232f14fb602SLionel Sambuc 	size_t no;
12332fe8fb19SBen Gras 	size_t nc;
12342fe8fb19SBen Gras 	size_t nbytes;
12352fe8fb19SBen Gras 	cset *cs;
12362fe8fb19SBen Gras 	size_t css;
1237f14fb602SLionel Sambuc 	size_t i;
1238*0a6a1f1dSLionel Sambuc 	void *old_ptr;
1239b7061124SArun Thomas 
12402fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
12412fe8fb19SBen Gras 
12422fe8fb19SBen Gras 	no = p->g->ncsets++;
12432fe8fb19SBen Gras 	css = (size_t)p->g->csetsize;
1244b7061124SArun Thomas 	if (no >= p->ncsalloc) {	/* need another column of space */
1245b7061124SArun Thomas 		p->ncsalloc += CHAR_BIT;
1246b7061124SArun Thomas 		nc = p->ncsalloc;
1247b7061124SArun Thomas 		assert(nc % CHAR_BIT == 0);
1248b7061124SArun Thomas 		nbytes = nc / CHAR_BIT * css;
1249f14fb602SLionel Sambuc 		if (MEMSIZE(p) > MEMLIMIT)
1250f14fb602SLionel Sambuc 			goto oomem;
1251*0a6a1f1dSLionel Sambuc 		if (reallocarr(&p->g->sets, nc, sizeof(cset)))
1252*0a6a1f1dSLionel Sambuc 			goto oomem;
1253*0a6a1f1dSLionel Sambuc 		old_ptr = p->g->setbits;
1254*0a6a1f1dSLionel Sambuc 		if (reallocarr(&p->g->setbits, nc / CHAR_BIT, css)) {
1255*0a6a1f1dSLionel Sambuc 			free(old_ptr);
1256*0a6a1f1dSLionel Sambuc 			goto oomem;
1257*0a6a1f1dSLionel Sambuc 		}
1258*0a6a1f1dSLionel Sambuc 		if (old_ptr != p->g->setbits) {
1259b7061124SArun Thomas 			for (i = 0; i < no; i++)
1260b7061124SArun Thomas 				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1261b7061124SArun Thomas 		}
1262*0a6a1f1dSLionel Sambuc 		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
1263b7061124SArun Thomas 	}
1264b7061124SArun Thomas 
1265b7061124SArun Thomas 	cs = &p->g->sets[no];
1266b7061124SArun Thomas 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1267f14fb602SLionel Sambuc 	cs->mask = 1 << (unsigned int)((no) % CHAR_BIT);
1268b7061124SArun Thomas 	cs->hash = 0;
1269b7061124SArun Thomas 	cs->smultis = 0;
1270b7061124SArun Thomas 	cs->multis = NULL;
1271b7061124SArun Thomas 
1272b7061124SArun Thomas 	return(cs);
1273*0a6a1f1dSLionel Sambuc 
1274*0a6a1f1dSLionel Sambuc oomem:
1275*0a6a1f1dSLionel Sambuc 	SETERROR(REG_ESPACE);
1276*0a6a1f1dSLionel Sambuc 	/* caller's responsibility not to do set ops */
1277*0a6a1f1dSLionel Sambuc 	return NULL;
1278b7061124SArun Thomas }
1279b7061124SArun Thomas 
1280b7061124SArun Thomas /*
1281b7061124SArun Thomas  - freeset - free a now-unused set
12822fe8fb19SBen Gras  == static void freeset(struct parse *p, cset *cs);
1283b7061124SArun Thomas  */
1284b7061124SArun Thomas static void
freeset(struct parse * p,cset * cs)12852fe8fb19SBen Gras freeset(
12862fe8fb19SBen Gras     struct parse *p,
12872fe8fb19SBen Gras     cset *cs)
1288b7061124SArun Thomas {
12892fe8fb19SBen Gras 	size_t i;
12902fe8fb19SBen Gras 	cset *top;
12912fe8fb19SBen Gras 	size_t css;
12922fe8fb19SBen Gras 
12932fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
12942fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
12952fe8fb19SBen Gras 
12962fe8fb19SBen Gras 	top = &p->g->sets[p->g->ncsets];
12972fe8fb19SBen Gras 	css = (size_t)p->g->csetsize;
1298b7061124SArun Thomas 
1299b7061124SArun Thomas 	for (i = 0; i < css; i++)
1300f14fb602SLionel Sambuc 		CHsub(cs, (int)i);
1301b7061124SArun Thomas 	if (cs == top-1)	/* recover only the easy case */
1302b7061124SArun Thomas 		p->g->ncsets--;
1303b7061124SArun Thomas }
1304b7061124SArun Thomas 
1305b7061124SArun Thomas /*
1306b7061124SArun Thomas  - freezeset - final processing on a set of characters
13072fe8fb19SBen Gras  == static int freezeset(struct parse *p, cset *cs);
1308b7061124SArun Thomas  *
1309b7061124SArun Thomas  * The main task here is merging identical sets.  This is usually a waste
1310b7061124SArun Thomas  * of time (although the hash code minimizes the overhead), but can win
1311b7061124SArun Thomas  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1312b7061124SArun Thomas  * is done using addition rather than xor -- all ASCII [aA] sets xor to
1313b7061124SArun Thomas  * the same value!
1314b7061124SArun Thomas  */
1315f14fb602SLionel Sambuc static sopno			/* set number */
freezeset(struct parse * p,cset * cs)13162fe8fb19SBen Gras freezeset(
13172fe8fb19SBen Gras     struct parse *p,
13182fe8fb19SBen Gras     cset *cs)
1319b7061124SArun Thomas {
13202fe8fb19SBen Gras 	uch h;
13212fe8fb19SBen Gras 	size_t i;
13222fe8fb19SBen Gras 	cset *top;
13232fe8fb19SBen Gras 	cset *cs2;
13242fe8fb19SBen Gras 	size_t css;
13252fe8fb19SBen Gras 
13262fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
13272fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
13282fe8fb19SBen Gras 
13292fe8fb19SBen Gras 	h = cs->hash;
13302fe8fb19SBen Gras 	top = &p->g->sets[p->g->ncsets];
13312fe8fb19SBen Gras 	css = (size_t)p->g->csetsize;
1332b7061124SArun Thomas 
1333b7061124SArun Thomas 	/* look for an earlier one which is the same */
1334b7061124SArun Thomas 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1335b7061124SArun Thomas 		if (cs2->hash == h && cs2 != cs) {
1336b7061124SArun Thomas 			/* maybe */
1337b7061124SArun Thomas 			for (i = 0; i < css; i++)
1338b7061124SArun Thomas 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1339b7061124SArun Thomas 					break;		/* no */
1340b7061124SArun Thomas 			if (i == css)
1341b7061124SArun Thomas 				break;			/* yes */
1342b7061124SArun Thomas 		}
1343b7061124SArun Thomas 
1344b7061124SArun Thomas 	if (cs2 < top) {	/* found one */
1345b7061124SArun Thomas 		freeset(p, cs);
1346b7061124SArun Thomas 		cs = cs2;
1347b7061124SArun Thomas 	}
1348b7061124SArun Thomas 
1349f14fb602SLionel Sambuc 	return (sopno)(cs - p->g->sets);
1350b7061124SArun Thomas }
1351b7061124SArun Thomas 
1352b7061124SArun Thomas /*
1353b7061124SArun Thomas  - firstch - return first character in a set (which must have at least one)
13542fe8fb19SBen Gras  == static int firstch(struct parse *p, cset *cs);
1355b7061124SArun Thomas  */
1356b7061124SArun Thomas static int			/* character; there is no "none" value */
firstch(struct parse * p,cset * cs)13572fe8fb19SBen Gras firstch(
13582fe8fb19SBen Gras     struct parse *p,
13592fe8fb19SBen Gras     cset *cs)
1360b7061124SArun Thomas {
13612fe8fb19SBen Gras 	size_t i;
13622fe8fb19SBen Gras 	size_t css;
13632fe8fb19SBen Gras 
13642fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
13652fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
13662fe8fb19SBen Gras 
13672fe8fb19SBen Gras 	css = (size_t)p->g->csetsize;
1368b7061124SArun Thomas 
1369b7061124SArun Thomas 	for (i = 0; i < css; i++)
1370b7061124SArun Thomas 		if (CHIN(cs, i))
1371b7061124SArun Thomas 			return((char)i);
1372b7061124SArun Thomas 	assert(never);
1373b7061124SArun Thomas 	return(0);		/* arbitrary */
1374b7061124SArun Thomas }
1375b7061124SArun Thomas 
1376b7061124SArun Thomas /*
1377b7061124SArun Thomas  - nch - number of characters in a set
13782fe8fb19SBen Gras  == static int nch(struct parse *p, cset *cs);
1379b7061124SArun Thomas  */
1380b7061124SArun Thomas static int
nch(struct parse * p,cset * cs)13812fe8fb19SBen Gras nch(
13822fe8fb19SBen Gras     struct parse *p,
13832fe8fb19SBen Gras     cset *cs)
1384b7061124SArun Thomas {
13852fe8fb19SBen Gras 	size_t i;
13862fe8fb19SBen Gras 	size_t css;
13872fe8fb19SBen Gras 	int n = 0;
13882fe8fb19SBen Gras 
13892fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
13902fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
13912fe8fb19SBen Gras 
13922fe8fb19SBen Gras 	css = (size_t)p->g->csetsize;
1393b7061124SArun Thomas 
1394b7061124SArun Thomas 	for (i = 0; i < css; i++)
1395b7061124SArun Thomas 		if (CHIN(cs, i))
1396b7061124SArun Thomas 			n++;
1397b7061124SArun Thomas 	return(n);
1398b7061124SArun Thomas }
1399b7061124SArun Thomas 
1400b7061124SArun Thomas /*
1401b7061124SArun Thomas  - mcadd - add a collating element to a cset
14022fe8fb19SBen Gras  == static void mcadd(struct parse *p, cset *cs, \
14032fe8fb19SBen Gras  ==	char *cp);
1404b7061124SArun Thomas  */
1405b7061124SArun Thomas static void
mcadd(struct parse * p,cset * cs,const char * cp)14062fe8fb19SBen Gras mcadd(
14072fe8fb19SBen Gras     struct parse *p,
14082fe8fb19SBen Gras     cset *cs,
14092fe8fb19SBen Gras     const char *cp)
1410b7061124SArun Thomas {
14112fe8fb19SBen Gras 	size_t oldend;
14122fe8fb19SBen Gras 
14132fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
14142fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
14152fe8fb19SBen Gras 	_DIAGASSERT(cp != NULL);
14162fe8fb19SBen Gras 
14172fe8fb19SBen Gras 	oldend = cs->smultis;
1418b7061124SArun Thomas 
1419b7061124SArun Thomas 	cs->smultis += strlen(cp) + 1;
1420b7061124SArun Thomas 	if (cs->multis == NULL)
1421b7061124SArun Thomas 		cs->multis = malloc(cs->smultis);
1422b7061124SArun Thomas 	else
1423b7061124SArun Thomas 		cs->multis = realloc(cs->multis, cs->smultis);
1424b7061124SArun Thomas 	if (cs->multis == NULL) {
1425b7061124SArun Thomas 		SETERROR(REG_ESPACE);
1426b7061124SArun Thomas 		return;
1427b7061124SArun Thomas 	}
1428b7061124SArun Thomas 
1429b7061124SArun Thomas 	(void) strcpy(cs->multis + oldend - 1, cp);
1430b7061124SArun Thomas 	cs->multis[cs->smultis - 1] = '\0';
1431b7061124SArun Thomas }
1432b7061124SArun Thomas 
14332fe8fb19SBen Gras #if 0
14342fe8fb19SBen Gras /*
14352fe8fb19SBen Gras  - mcsub - subtract a collating element from a cset
14362fe8fb19SBen Gras  == static void mcsub(cset *cs, char *cp);
14372fe8fb19SBen Gras  */
14382fe8fb19SBen Gras static void
14392fe8fb19SBen Gras mcsub(
14402fe8fb19SBen Gras     cset *cs,
14412fe8fb19SBen Gras     char *cp)
14422fe8fb19SBen Gras {
14432fe8fb19SBen Gras 	char *fp;
14442fe8fb19SBen Gras 	size_t len;
14452fe8fb19SBen Gras 
14462fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
14472fe8fb19SBen Gras 	_DIAGASSERT(cp != NULL);
14482fe8fb19SBen Gras 
14492fe8fb19SBen Gras 	fp = mcfind(cs, cp);
14502fe8fb19SBen Gras 	len = strlen(fp);
14512fe8fb19SBen Gras 
14522fe8fb19SBen Gras 	assert(fp != NULL);
14532fe8fb19SBen Gras 	(void) memmove(fp, fp + len + 1,
14542fe8fb19SBen Gras 				cs->smultis - (fp + len + 1 - cs->multis));
14552fe8fb19SBen Gras 	cs->smultis -= len;
14562fe8fb19SBen Gras 
14572fe8fb19SBen Gras 	if (cs->smultis == 0) {
14582fe8fb19SBen Gras 		free(cs->multis);
14592fe8fb19SBen Gras 		cs->multis = NULL;
14602fe8fb19SBen Gras 		return;
14612fe8fb19SBen Gras 	}
14622fe8fb19SBen Gras 
14632fe8fb19SBen Gras 	cs->multis = realloc(cs->multis, cs->smultis);
14642fe8fb19SBen Gras 	assert(cs->multis != NULL);
14652fe8fb19SBen Gras }
14662fe8fb19SBen Gras 
14672fe8fb19SBen Gras /*
14682fe8fb19SBen Gras  - mcin - is a collating element in a cset?
14692fe8fb19SBen Gras  == static int mcin(cset *cs, char *cp);
14702fe8fb19SBen Gras  */
14712fe8fb19SBen Gras static int
14722fe8fb19SBen Gras mcin(
14732fe8fb19SBen Gras     cset *cs,
14742fe8fb19SBen Gras     char *cp)
14752fe8fb19SBen Gras {
14762fe8fb19SBen Gras 
14772fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
14782fe8fb19SBen Gras 	_DIAGASSERT(cp != NULL);
14792fe8fb19SBen Gras 
14802fe8fb19SBen Gras 	return(mcfind(cs, cp) != NULL);
14812fe8fb19SBen Gras }
14822fe8fb19SBen Gras 
14832fe8fb19SBen Gras /*
14842fe8fb19SBen Gras  - mcfind - find a collating element in a cset
14852fe8fb19SBen Gras  == static char *mcfind(cset *cs, char *cp);
14862fe8fb19SBen Gras  */
14872fe8fb19SBen Gras static char *
14882fe8fb19SBen Gras mcfind(
14892fe8fb19SBen Gras     cset *cs,
14902fe8fb19SBen Gras     char *cp)
14912fe8fb19SBen Gras {
14922fe8fb19SBen Gras 	char *p;
14932fe8fb19SBen Gras 
14942fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
14952fe8fb19SBen Gras 	_DIAGASSERT(cp != NULL);
14962fe8fb19SBen Gras 
14972fe8fb19SBen Gras 	if (cs->multis == NULL)
14982fe8fb19SBen Gras 		return(NULL);
14992fe8fb19SBen Gras 	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
15002fe8fb19SBen Gras 		if (strcmp(cp, p) == 0)
15012fe8fb19SBen Gras 			return(p);
15022fe8fb19SBen Gras 	return(NULL);
15032fe8fb19SBen Gras }
15042fe8fb19SBen Gras #endif
15052fe8fb19SBen Gras 
1506b7061124SArun Thomas /*
1507b7061124SArun Thomas  - mcinvert - invert the list of collating elements in a cset
15082fe8fb19SBen Gras  == static void mcinvert(struct parse *p, cset *cs);
1509b7061124SArun Thomas  *
1510b7061124SArun Thomas  * This would have to know the set of possibilities.  Implementation
1511b7061124SArun Thomas  * is deferred.
1512b7061124SArun Thomas  */
15132fe8fb19SBen Gras /* ARGSUSED */
1514b7061124SArun Thomas static void
mcinvert(struct parse * p,cset * cs)15152fe8fb19SBen Gras mcinvert(
15162fe8fb19SBen Gras     struct parse *p,
15172fe8fb19SBen Gras     cset *cs)
1518b7061124SArun Thomas {
15192fe8fb19SBen Gras 
15202fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
15212fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
15222fe8fb19SBen Gras 
1523b7061124SArun Thomas 	assert(cs->multis == NULL);	/* xxx */
1524b7061124SArun Thomas }
1525b7061124SArun Thomas 
1526b7061124SArun Thomas /*
1527b7061124SArun Thomas  - mccase - add case counterparts of the list of collating elements in a cset
15282fe8fb19SBen Gras  == static void mccase(struct parse *p, cset *cs);
1529b7061124SArun Thomas  *
1530b7061124SArun Thomas  * This would have to know the set of possibilities.  Implementation
1531b7061124SArun Thomas  * is deferred.
1532b7061124SArun Thomas  */
15332fe8fb19SBen Gras /* ARGSUSED */
1534b7061124SArun Thomas static void
mccase(struct parse * p,cset * cs)15352fe8fb19SBen Gras mccase(
15362fe8fb19SBen Gras     struct parse *p,
15372fe8fb19SBen Gras     cset *cs)
1538b7061124SArun Thomas {
15392fe8fb19SBen Gras 
15402fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
15412fe8fb19SBen Gras 	_DIAGASSERT(cs != NULL);
15422fe8fb19SBen Gras 
1543b7061124SArun Thomas 	assert(cs->multis == NULL);	/* xxx */
1544b7061124SArun Thomas }
1545b7061124SArun Thomas 
1546b7061124SArun Thomas /*
1547b7061124SArun Thomas  - isinsets - is this character in any sets?
15482fe8fb19SBen Gras  == static int isinsets(struct re_guts *g, int c);
1549b7061124SArun Thomas  */
1550b7061124SArun Thomas static int			/* predicate */
isinsets(struct re_guts * g,int c)15512fe8fb19SBen Gras isinsets(
15522fe8fb19SBen Gras     struct re_guts *g,
15532fe8fb19SBen Gras     int c)
1554b7061124SArun Thomas {
15552fe8fb19SBen Gras 	uch *col;
1556f14fb602SLionel Sambuc 	size_t i;
1557f14fb602SLionel Sambuc 	size_t ncols;
15582fe8fb19SBen Gras 	unsigned uc = (unsigned char)c;
15592fe8fb19SBen Gras 
15602fe8fb19SBen Gras 	_DIAGASSERT(g != NULL);
15612fe8fb19SBen Gras 
1562f14fb602SLionel Sambuc 	if (g->setbits == NULL)
1563f14fb602SLionel Sambuc 		return 0;
1564f14fb602SLionel Sambuc 
15652fe8fb19SBen Gras 	ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1566b7061124SArun Thomas 
1567b7061124SArun Thomas 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1568b7061124SArun Thomas 		if (col[uc] != 0)
1569b7061124SArun Thomas 			return(1);
1570b7061124SArun Thomas 	return(0);
1571b7061124SArun Thomas }
1572b7061124SArun Thomas 
1573b7061124SArun Thomas /*
1574b7061124SArun Thomas  - samesets - are these two characters in exactly the same sets?
15752fe8fb19SBen Gras  == static int samesets(struct re_guts *g, int c1, int c2);
1576b7061124SArun Thomas  */
1577b7061124SArun Thomas static int			/* predicate */
samesets(struct re_guts * g,int c1,int c2)15782fe8fb19SBen Gras samesets(
15792fe8fb19SBen Gras     struct re_guts *g,
15802fe8fb19SBen Gras     int c1,
15812fe8fb19SBen Gras     int c2)
1582b7061124SArun Thomas {
15832fe8fb19SBen Gras 	uch *col;
1584f14fb602SLionel Sambuc 	size_t i;
1585f14fb602SLionel Sambuc 	size_t ncols;
15862fe8fb19SBen Gras 	unsigned uc1 = (unsigned char)c1;
15872fe8fb19SBen Gras 	unsigned uc2 = (unsigned char)c2;
15882fe8fb19SBen Gras 
15892fe8fb19SBen Gras 	_DIAGASSERT(g != NULL);
15902fe8fb19SBen Gras 
15912fe8fb19SBen Gras 	ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1592b7061124SArun Thomas 
1593b7061124SArun Thomas 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1594b7061124SArun Thomas 		if (col[uc1] != col[uc2])
1595b7061124SArun Thomas 			return(0);
1596b7061124SArun Thomas 	return(1);
1597b7061124SArun Thomas }
1598b7061124SArun Thomas 
1599b7061124SArun Thomas /*
1600b7061124SArun Thomas  - categorize - sort out character categories
16012fe8fb19SBen Gras  == static void categorize(struct parse *p, struct re_guts *g);
1602b7061124SArun Thomas  */
1603b7061124SArun Thomas static void
categorize(struct parse * p,struct re_guts * g)16042fe8fb19SBen Gras categorize(
16052fe8fb19SBen Gras     struct parse *p,
16062fe8fb19SBen Gras     struct re_guts *g)
1607b7061124SArun Thomas {
16082fe8fb19SBen Gras 	cat_t *cats;
16092fe8fb19SBen Gras 	int c;
16102fe8fb19SBen Gras 	int c2;
16112fe8fb19SBen Gras 	cat_t cat;
16122fe8fb19SBen Gras 
16132fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
16142fe8fb19SBen Gras 	_DIAGASSERT(g != NULL);
16152fe8fb19SBen Gras 
16162fe8fb19SBen Gras 	cats = g->categories;
1617b7061124SArun Thomas 
1618b7061124SArun Thomas 	/* avoid making error situations worse */
1619b7061124SArun Thomas 	if (p->error != 0)
1620b7061124SArun Thomas 		return;
1621b7061124SArun Thomas 
1622b7061124SArun Thomas 	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1623b7061124SArun Thomas 		if (cats[c] == 0 && isinsets(g, c)) {
1624f14fb602SLionel Sambuc 			_DIAGASSERT(__type_fit(unsigned char,
1625f14fb602SLionel Sambuc 			    g->ncategories + 1));
1626b7061124SArun Thomas 			cat = g->ncategories++;
1627b7061124SArun Thomas 			cats[c] = cat;
1628b7061124SArun Thomas 			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1629b7061124SArun Thomas 				if (cats[c2] == 0 && samesets(g, c, c2))
1630b7061124SArun Thomas 					cats[c2] = cat;
1631b7061124SArun Thomas 		}
1632b7061124SArun Thomas }
1633b7061124SArun Thomas 
1634b7061124SArun Thomas /*
1635b7061124SArun Thomas  - dupl - emit a duplicate of a bunch of sops
16362fe8fb19SBen Gras  == static sopno dupl(struct parse *p, sopno start, sopno finish);
1637b7061124SArun Thomas  */
1638b7061124SArun Thomas static sopno			/* start of duplicate */
dupl(struct parse * p,sopno start,sopno finish)16392fe8fb19SBen Gras dupl(
16402fe8fb19SBen Gras     struct parse *p,
16412fe8fb19SBen Gras     sopno start,			/* from here */
16422fe8fb19SBen Gras     sopno finish)			/* to this less one */
1643b7061124SArun Thomas {
16442fe8fb19SBen Gras 	sopno ret;
16452fe8fb19SBen Gras 	sopno len = finish - start;
16462fe8fb19SBen Gras 
16472fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
16482fe8fb19SBen Gras 
16492fe8fb19SBen Gras 	ret = HERE();
1650b7061124SArun Thomas 
1651b7061124SArun Thomas 	assert(finish >= start);
1652b7061124SArun Thomas 	if (len == 0)
1653b7061124SArun Thomas 		return(ret);
1654f14fb602SLionel Sambuc 	if (!enlarge(p, p->ssize + len))/* this many unexpected additions */
1655f14fb602SLionel Sambuc 		return ret;
16562fe8fb19SBen Gras 	(void)memcpy(p->strip + p->slen, p->strip + start,
16572fe8fb19SBen Gras 	    (size_t)len * sizeof(sop));
1658b7061124SArun Thomas 	p->slen += len;
1659b7061124SArun Thomas 	return(ret);
1660b7061124SArun Thomas }
1661b7061124SArun Thomas 
1662b7061124SArun Thomas /*
1663b7061124SArun Thomas  - doemit - emit a strip operator
16642fe8fb19SBen Gras  == static void doemit(struct parse *p, sop op, size_t opnd);
1665b7061124SArun Thomas  *
1666b7061124SArun Thomas  * It might seem better to implement this as a macro with a function as
1667b7061124SArun Thomas  * hard-case backup, but it's just too big and messy unless there are
1668b7061124SArun Thomas  * some changes to the data structures.  Maybe later.
1669b7061124SArun Thomas  */
1670b7061124SArun Thomas static void
doemit(struct parse * p,sop op,sopno opnd)16712fe8fb19SBen Gras doemit(
16722fe8fb19SBen Gras     struct parse *p,
16732fe8fb19SBen Gras     sop op,
16742fe8fb19SBen Gras     sopno opnd)
1675b7061124SArun Thomas {
16762fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
16772fe8fb19SBen Gras 
1678b7061124SArun Thomas 	/* avoid making error situations worse */
1679b7061124SArun Thomas 	if (p->error != 0)
1680b7061124SArun Thomas 		return;
1681b7061124SArun Thomas 
1682b7061124SArun Thomas 	/* deal with oversize operands ("can't happen", more or less) */
1683b7061124SArun Thomas 	assert(opnd < 1<<OPSHIFT);
1684b7061124SArun Thomas 
1685b7061124SArun Thomas 	/* deal with undersized strip */
1686b7061124SArun Thomas 	if (p->slen >= p->ssize)
1687f14fb602SLionel Sambuc 		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
1688f14fb602SLionel Sambuc 			return;
1689b7061124SArun Thomas 
1690b7061124SArun Thomas 	/* finally, it's all reduced to the easy case */
1691f14fb602SLionel Sambuc 	p->strip[p->slen++] = (sop)SOP(op, opnd);
1692b7061124SArun Thomas }
1693b7061124SArun Thomas 
1694b7061124SArun Thomas /*
1695b7061124SArun Thomas  - doinsert - insert a sop into the strip
16962fe8fb19SBen Gras  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
1697b7061124SArun Thomas  */
1698b7061124SArun Thomas static void
doinsert(struct parse * p,sop op,sopno opnd,sopno pos)16992fe8fb19SBen Gras doinsert(
17002fe8fb19SBen Gras     struct parse *p,
17012fe8fb19SBen Gras     sop op,
17022fe8fb19SBen Gras     sopno opnd,
17032fe8fb19SBen Gras     sopno pos)
1704b7061124SArun Thomas {
17052fe8fb19SBen Gras 	sopno sn;
17062fe8fb19SBen Gras 	sop s;
17072fe8fb19SBen Gras 	int i;
17082fe8fb19SBen Gras 
17092fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
1710b7061124SArun Thomas 
1711b7061124SArun Thomas 	/* avoid making error situations worse */
1712b7061124SArun Thomas 	if (p->error != 0)
1713b7061124SArun Thomas 		return;
1714b7061124SArun Thomas 
1715b7061124SArun Thomas 	sn = HERE();
1716b7061124SArun Thomas 	EMIT(op, opnd);		/* do checks, ensure space */
1717b7061124SArun Thomas 	assert(HERE() == sn+1);
1718b7061124SArun Thomas 	s = p->strip[sn];
1719b7061124SArun Thomas 
1720b7061124SArun Thomas 	/* adjust paren pointers */
1721b7061124SArun Thomas 	assert(pos > 0);
1722b7061124SArun Thomas 	for (i = 1; i < NPAREN; i++) {
1723b7061124SArun Thomas 		if (p->pbegin[i] >= pos) {
1724b7061124SArun Thomas 			p->pbegin[i]++;
1725b7061124SArun Thomas 		}
1726b7061124SArun Thomas 		if (p->pend[i] >= pos) {
1727b7061124SArun Thomas 			p->pend[i]++;
1728b7061124SArun Thomas 		}
1729b7061124SArun Thomas 	}
1730b7061124SArun Thomas 
17312fe8fb19SBen Gras 	memmove(&p->strip[pos+1], &p->strip[pos], (HERE()-pos-1)*sizeof(sop));
1732b7061124SArun Thomas 	p->strip[pos] = s;
1733b7061124SArun Thomas }
1734b7061124SArun Thomas 
1735b7061124SArun Thomas /*
1736b7061124SArun Thomas  - dofwd - complete a forward reference
17372fe8fb19SBen Gras  == static void dofwd(struct parse *p, sopno pos, sop value);
1738b7061124SArun Thomas  */
1739b7061124SArun Thomas static void
dofwd(struct parse * p,sopno pos,sopno value)17402fe8fb19SBen Gras dofwd(
17412fe8fb19SBen Gras     struct parse *p,
17422fe8fb19SBen Gras     sopno pos,
17432fe8fb19SBen Gras     sopno value)
1744b7061124SArun Thomas {
17452fe8fb19SBen Gras 
17462fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
17472fe8fb19SBen Gras 
1748b7061124SArun Thomas 	/* avoid making error situations worse */
1749b7061124SArun Thomas 	if (p->error != 0)
1750b7061124SArun Thomas 		return;
1751b7061124SArun Thomas 
1752b7061124SArun Thomas 	assert(value < 1<<OPSHIFT);
1753f14fb602SLionel Sambuc 	p->strip[pos] = (sop)(OP(p->strip[pos]) | value);
1754b7061124SArun Thomas }
1755b7061124SArun Thomas 
1756b7061124SArun Thomas /*
1757b7061124SArun Thomas  - enlarge - enlarge the strip
17582fe8fb19SBen Gras  == static void enlarge(struct parse *p, sopno size);
1759b7061124SArun Thomas  */
1760f14fb602SLionel Sambuc static int
enlarge(struct parse * p,sopno size)1761*0a6a1f1dSLionel Sambuc enlarge(struct parse *p, sopno size)
1762b7061124SArun Thomas {
17632fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
1764b7061124SArun Thomas 
1765b7061124SArun Thomas 	if (p->ssize >= size)
1766f14fb602SLionel Sambuc 		return 1;
1767b7061124SArun Thomas 
1768*0a6a1f1dSLionel Sambuc 	if (MEMSIZE(p) > MEMLIMIT || reallocarr(&p->strip, size, sizeof(sop))) {
1769b7061124SArun Thomas 		SETERROR(REG_ESPACE);
1770f14fb602SLionel Sambuc 		return 0;
1771b7061124SArun Thomas 	}
1772*0a6a1f1dSLionel Sambuc 	p->ssize = size;
1773f14fb602SLionel Sambuc 	return 1;
1774b7061124SArun Thomas }
1775b7061124SArun Thomas 
1776b7061124SArun Thomas /*
1777b7061124SArun Thomas  - stripsnug - compact the strip
17782fe8fb19SBen Gras  == static void stripsnug(struct parse *p, struct re_guts *g);
1779b7061124SArun Thomas  */
1780b7061124SArun Thomas static void
stripsnug(struct parse * p,struct re_guts * g)17812fe8fb19SBen Gras stripsnug(
17822fe8fb19SBen Gras     struct parse *p,
17832fe8fb19SBen Gras     struct re_guts *g)
1784b7061124SArun Thomas {
17852fe8fb19SBen Gras 
17862fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
17872fe8fb19SBen Gras 	_DIAGASSERT(g != NULL);
17882fe8fb19SBen Gras 
1789b7061124SArun Thomas 	g->nstates = p->slen;
1790b7061124SArun Thomas 	g->strip = p->strip;
1791*0a6a1f1dSLionel Sambuc 	reallocarr(&g->strip, p->slen, sizeof(sop));
1792*0a6a1f1dSLionel Sambuc 	/* Ignore error as tries to free memory only. */
1793b7061124SArun Thomas }
1794b7061124SArun Thomas 
1795b7061124SArun Thomas /*
1796b7061124SArun Thomas  - findmust - fill in must and mlen with longest mandatory literal string
17972fe8fb19SBen Gras  == static void findmust(struct parse *p, struct re_guts *g);
1798b7061124SArun Thomas  *
1799b7061124SArun Thomas  * This algorithm could do fancy things like analyzing the operands of |
1800b7061124SArun Thomas  * for common subsequences.  Someday.  This code is simple and finds most
1801b7061124SArun Thomas  * of the interesting cases.
1802b7061124SArun Thomas  *
1803b7061124SArun Thomas  * Note that must and mlen got initialized during setup.
1804b7061124SArun Thomas  */
1805b7061124SArun Thomas static void
findmust(struct parse * p,struct re_guts * g)18062fe8fb19SBen Gras findmust(
18072fe8fb19SBen Gras     struct parse *p,
18082fe8fb19SBen Gras     struct re_guts *g)
1809b7061124SArun Thomas {
18102fe8fb19SBen Gras 	sop *scan;
18112fe8fb19SBen Gras 	sop *start = NULL;
18122fe8fb19SBen Gras 	sop *newstart = NULL;
18132fe8fb19SBen Gras 	sopno newlen;
18142fe8fb19SBen Gras 	sop s;
18152fe8fb19SBen Gras 	char *cp;
18162fe8fb19SBen Gras 	sopno i;
18172fe8fb19SBen Gras 
18182fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
18192fe8fb19SBen Gras 	_DIAGASSERT(g != NULL);
1820b7061124SArun Thomas 
1821b7061124SArun Thomas 	/* avoid making error situations worse */
1822b7061124SArun Thomas 	if (p->error != 0)
1823b7061124SArun Thomas 		return;
1824b7061124SArun Thomas 
1825b7061124SArun Thomas 	/* find the longest OCHAR sequence in strip */
1826b7061124SArun Thomas 	newlen = 0;
1827b7061124SArun Thomas 	scan = g->strip + 1;
1828b7061124SArun Thomas 	do {
1829b7061124SArun Thomas 		s = *scan++;
1830b7061124SArun Thomas 		switch (OP(s)) {
1831b7061124SArun Thomas 		case OCHAR:		/* sequence member */
1832b7061124SArun Thomas 			if (newlen == 0)		/* new sequence */
1833b7061124SArun Thomas 				newstart = scan - 1;
1834b7061124SArun Thomas 			newlen++;
1835b7061124SArun Thomas 			break;
1836b7061124SArun Thomas 		case OPLUS_:		/* things that don't break one */
1837b7061124SArun Thomas 		case OLPAREN:
1838b7061124SArun Thomas 		case ORPAREN:
1839b7061124SArun Thomas 			break;
1840b7061124SArun Thomas 		case OQUEST_:		/* things that must be skipped */
1841b7061124SArun Thomas 		case OCH_:
1842b7061124SArun Thomas 			scan--;
1843b7061124SArun Thomas 			do {
1844b7061124SArun Thomas 				scan += OPND(s);
1845b7061124SArun Thomas 				s = *scan;
1846b7061124SArun Thomas 				/* assert() interferes w debug printouts */
1847b7061124SArun Thomas 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
1848b7061124SArun Thomas 							OP(s) != OOR2) {
1849b7061124SArun Thomas 					g->iflags |= BAD;
1850b7061124SArun Thomas 					return;
1851b7061124SArun Thomas 				}
1852b7061124SArun Thomas 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
18532fe8fb19SBen Gras 			/* FALLTHROUGH */
1854b7061124SArun Thomas 		default:		/* things that break a sequence */
1855b7061124SArun Thomas 			if (newlen > g->mlen) {		/* ends one */
1856b7061124SArun Thomas 				start = newstart;
1857b7061124SArun Thomas 				g->mlen = newlen;
1858b7061124SArun Thomas 			}
1859b7061124SArun Thomas 			newlen = 0;
1860b7061124SArun Thomas 			break;
1861b7061124SArun Thomas 		}
1862b7061124SArun Thomas 	} while (OP(s) != OEND);
1863b7061124SArun Thomas 
18642fe8fb19SBen Gras 	if (start == NULL)
18652fe8fb19SBen Gras 		g->mlen = 0;
18662fe8fb19SBen Gras 
1867b7061124SArun Thomas 	if (g->mlen == 0)	/* there isn't one */
1868b7061124SArun Thomas 		return;
1869b7061124SArun Thomas 
1870b7061124SArun Thomas 	/* turn it into a character string */
1871b7061124SArun Thomas 	g->must = malloc((size_t)g->mlen + 1);
1872b7061124SArun Thomas 	if (g->must == NULL) {		/* argh; just forget it */
1873b7061124SArun Thomas 		g->mlen = 0;
1874b7061124SArun Thomas 		return;
1875b7061124SArun Thomas 	}
1876b7061124SArun Thomas 	cp = g->must;
1877b7061124SArun Thomas 	scan = start;
1878b7061124SArun Thomas 	for (i = g->mlen; i > 0; i--) {
1879b7061124SArun Thomas 		while (OP(s = *scan++) != OCHAR)
1880b7061124SArun Thomas 			continue;
1881b7061124SArun Thomas 		assert(cp < g->must + g->mlen);
1882b7061124SArun Thomas 		*cp++ = (char)OPND(s);
1883b7061124SArun Thomas 	}
1884b7061124SArun Thomas 	assert(cp == g->must + g->mlen);
1885b7061124SArun Thomas 	*cp++ = '\0';		/* just on general principles */
1886b7061124SArun Thomas }
1887b7061124SArun Thomas 
1888b7061124SArun Thomas /*
1889b7061124SArun Thomas  - pluscount - count + nesting
18902fe8fb19SBen Gras  == static sopno pluscount(struct parse *p, struct re_guts *g);
1891b7061124SArun Thomas  */
1892b7061124SArun Thomas static sopno			/* nesting depth */
pluscount(struct parse * p,struct re_guts * g)18932fe8fb19SBen Gras pluscount(
18942fe8fb19SBen Gras     struct parse *p,
18952fe8fb19SBen Gras     struct re_guts *g)
1896b7061124SArun Thomas {
18972fe8fb19SBen Gras 	sop *scan;
18982fe8fb19SBen Gras 	sop s;
18992fe8fb19SBen Gras 	sopno plusnest = 0;
19002fe8fb19SBen Gras 	sopno maxnest = 0;
19012fe8fb19SBen Gras 
19022fe8fb19SBen Gras 	_DIAGASSERT(p != NULL);
19032fe8fb19SBen Gras 	_DIAGASSERT(g != NULL);
1904b7061124SArun Thomas 
1905b7061124SArun Thomas 	if (p->error != 0)
1906b7061124SArun Thomas 		return(0);	/* there may not be an OEND */
1907b7061124SArun Thomas 
1908b7061124SArun Thomas 	scan = g->strip + 1;
1909b7061124SArun Thomas 	do {
1910b7061124SArun Thomas 		s = *scan++;
1911b7061124SArun Thomas 		switch (OP(s)) {
1912b7061124SArun Thomas 		case OPLUS_:
1913b7061124SArun Thomas 			plusnest++;
1914b7061124SArun Thomas 			break;
1915b7061124SArun Thomas 		case O_PLUS:
1916b7061124SArun Thomas 			if (plusnest > maxnest)
1917b7061124SArun Thomas 				maxnest = plusnest;
1918b7061124SArun Thomas 			plusnest--;
1919b7061124SArun Thomas 			break;
1920b7061124SArun Thomas 		}
1921b7061124SArun Thomas 	} while (OP(s) != OEND);
1922b7061124SArun Thomas 	if (plusnest != 0)
1923b7061124SArun Thomas 		g->iflags |= BAD;
1924b7061124SArun Thomas 	return(maxnest);
1925b7061124SArun Thomas }
1926