xref: /csrg-svn/lib/libc/regex/regexec.c (revision 56355)
155851Sbostic /*-
255851Sbostic  * Copyright (c) 1992 Henry Spencer.
355851Sbostic  * Copyright (c) 1992 The Regents of the University of California.
455851Sbostic  * All rights reserved.
555851Sbostic  *
655851Sbostic  * This code is derived from software contributed to Berkeley by
755851Sbostic  * Henry Spencer of the University of Toronto.
855851Sbostic  *
955851Sbostic  * %sccs.include.redist.c%
1055851Sbostic  *
11*56355Sbostic  *	@(#)regexec.c	5.2 (Berkeley) 09/30/92
1255851Sbostic  */
1355851Sbostic 
1455851Sbostic #if defined(LIBC_SCCS) && !defined(lint)
15*56355Sbostic static char sccsid[] = "@(#)regexec.c	5.2 (Berkeley) 09/30/92";
1655851Sbostic #endif /* LIBC_SCCS and not lint */
1755851Sbostic 
18*56355Sbostic /*
19*56355Sbostic  * the outer shell of regexec()
20*56355Sbostic  *
21*56355Sbostic  * This file includes engine.c *twice*, after muchos fiddling with the
22*56355Sbostic  * macros that code uses.  This lets the same code operate on two different
23*56355Sbostic  * representations for state sets.
24*56355Sbostic  */
2555851Sbostic #include <sys/types.h>
2655851Sbostic #include <stdio.h>
2755851Sbostic #include <stdlib.h>
2855851Sbostic #include <string.h>
2955851Sbostic #include <limits.h>
30*56355Sbostic #include <ctype.h>
3155851Sbostic #include <regex.h>
3255851Sbostic 
3355851Sbostic #include "utils.h"
3455851Sbostic #include "regex2.h"
3555851Sbostic 
3655851Sbostic /* macros for manipulating states, small version */
3755851Sbostic #define	states	long
38*56355Sbostic #define	states1	states		/* for later use in regexec() decision */
3955851Sbostic #define	CLEAR(v)	((v) = 0)
4055851Sbostic #define	SET0(v, n)	((v) &= ~(1 << (n)))
4155851Sbostic #define	SET1(v, n)	((v) |= 1 << (n))
4255851Sbostic #define	ISSET(v, n)	((v) & (1 << (n)))
4355851Sbostic #define	ASSIGN(d, s)	((d) = (s))
4455851Sbostic #define	EQ(a, b)	((a) == (b))
4555851Sbostic #define	STATEVARS	int dummy	/* dummy version */
4655851Sbostic #define	STATESETUP(m, n)	/* nothing */
4755851Sbostic #define	STATETEARDOWN(m)	/* nothing */
48*56355Sbostic #define	SETUP(v)	((v) = 0)
4955851Sbostic #define	onestate	int
5055851Sbostic #define	INIT(o, n)	((o) = 1 << (n))
5155851Sbostic #define	INC(o)	((o) <<= 1)
5255851Sbostic #define	ISSTATEIN(v, o)	((v) & (o))
5355851Sbostic /* some abbreviations; note that some of these know variable names! */
5455851Sbostic /* do "if I'm here, I can also be there" etc without branches */
5555851Sbostic #define	FWD(dst, src, n)	((dst) |= ((src)&(here)) << (n))
5655851Sbostic #define	BACK(dst, src, n)	((dst) |= ((src)&(here)) >> (n))
5755851Sbostic #define	ISSETBACK(v, n)	((v) & (here >> (n)))
5855851Sbostic /* function names */
5955851Sbostic #define SNAMES			/* engine.c looks after details */
6055851Sbostic 
6155851Sbostic #include "engine.c"
6255851Sbostic 
6355851Sbostic /* now undo things */
6455851Sbostic #undef	states
6555851Sbostic #undef	CLEAR
6655851Sbostic #undef	SET0
6755851Sbostic #undef	SET1
6855851Sbostic #undef	ISSET
6955851Sbostic #undef	ASSIGN
7055851Sbostic #undef	EQ
7155851Sbostic #undef	STATEVARS
7255851Sbostic #undef	STATESETUP
7355851Sbostic #undef	STATETEARDOWN
7455851Sbostic #undef	SETUP
7555851Sbostic #undef	onestate
7655851Sbostic #undef	INIT
7755851Sbostic #undef	INC
7855851Sbostic #undef	ISSTATEIN
7955851Sbostic #undef	FWD
8055851Sbostic #undef	BACK
8155851Sbostic #undef	ISSETBACK
8255851Sbostic #undef	SNAMES
8355851Sbostic 
8455851Sbostic /* macros for manipulating states, large version */
8555851Sbostic #define	states	char *
8655851Sbostic #define	CLEAR(v)	memset(v, 0, m->g->nstates)
8755851Sbostic #define	SET0(v, n)	((v)[n] = 0)
8855851Sbostic #define	SET1(v, n)	((v)[n] = 1)
8955851Sbostic #define	ISSET(v, n)	((v)[n])
9055851Sbostic #define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
9155851Sbostic #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
9255851Sbostic #define	STATEVARS	int vn; char *space
9355851Sbostic #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
9455851Sbostic 				if ((m)->space == NULL) return(REG_ESPACE); \
9555851Sbostic 				(m)->vn = 0; }
9655851Sbostic #define	STATETEARDOWN(m)	{ free((m)->space); }
9755851Sbostic #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
9855851Sbostic #define	onestate	int
9955851Sbostic #define	INIT(o, n)	((o) = (n))
10055851Sbostic #define	INC(o)	((o)++)
10155851Sbostic #define	ISSTATEIN(v, o)	((v)[o])
10255851Sbostic /* some abbreviations; note that some of these know variable names! */
10355851Sbostic /* do "if I'm here, I can also be there" etc without branches */
10455851Sbostic #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
10555851Sbostic #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
10655851Sbostic #define	ISSETBACK(v, n)	((v)[here - (n)])
10755851Sbostic /* function names */
10855851Sbostic #define	LNAMES			/* flag */
10955851Sbostic 
11055851Sbostic #include "engine.c"
111*56355Sbostic 
112*56355Sbostic /*
113*56355Sbostic  - regexec - interface for matching
114*56355Sbostic  = extern int regexec(const regex_t *preg, const char *string, size_t nmatch, \
115*56355Sbostic  =					regmatch_t pmatch[], int eflags);
116*56355Sbostic  = #define	REG_NOTBOL	00001
117*56355Sbostic  = #define	REG_NOTEOL	00002
118*56355Sbostic  = #define	REG_STARTEND	00004
119*56355Sbostic  = #define	REG_TRACE	00400
120*56355Sbostic  = #define	REG_LARGE	01000
121*56355Sbostic  = #define	REG_BACKR	02000
122*56355Sbostic  *
123*56355Sbostic  * We put this here so we can exploit knowledge of the state representation
124*56355Sbostic  * when choosing which matcher to call.  Also, by this point the matchers
125*56355Sbostic  * have been prototyped.
126*56355Sbostic  */
127*56355Sbostic int				/* 0 success, REG_NOMATCH failure */
128*56355Sbostic regexec(preg, string, nmatch, pmatch, eflags)
129*56355Sbostic const regex_t *preg;
130*56355Sbostic const char *string;
131*56355Sbostic size_t nmatch;
132*56355Sbostic regmatch_t pmatch[];
133*56355Sbostic int eflags;
134*56355Sbostic {
135*56355Sbostic 	register struct re_guts *g = preg->re_g;
136*56355Sbostic #ifdef REDEBUG
137*56355Sbostic #	define	GOODFLAGS(f)	(f)
138*56355Sbostic #else
139*56355Sbostic #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
140*56355Sbostic #endif
141*56355Sbostic 
142*56355Sbostic 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
143*56355Sbostic 		return(REG_BADPAT);
144*56355Sbostic 	assert(!(g->iflags&BAD));
145*56355Sbostic 	if (g->iflags&BAD)		/* backstop for no-debug case */
146*56355Sbostic 		return(REG_BADPAT);
147*56355Sbostic 	eflags = GOODFLAGS(eflags);	/* xxx should we complain? */
148*56355Sbostic 
149*56355Sbostic 	if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
150*56355Sbostic 		return(smatcher(g, (uchar *)string, nmatch, pmatch, eflags));
151*56355Sbostic 	else
152*56355Sbostic 		return(lmatcher(g, (uchar *)string, nmatch, pmatch, eflags));
153*56355Sbostic }
154