xref: /llvm-project/llvm/lib/Support/regexec.c (revision 228e6d4cf347820fce53e57c24b266f67a413d85)
1e14d4cdbSTorok Edwin /*-
2e14d4cdbSTorok Edwin  * This code is derived from OpenBSD's libc/regex, original license follows:
3e14d4cdbSTorok Edwin  *
4e14d4cdbSTorok Edwin  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5e14d4cdbSTorok Edwin  * Copyright (c) 1992, 1993, 1994
6e14d4cdbSTorok Edwin  *	The Regents of the University of California.  All rights reserved.
7e14d4cdbSTorok Edwin  *
8e14d4cdbSTorok Edwin  * This code is derived from software contributed to Berkeley by
9e14d4cdbSTorok Edwin  * Henry Spencer.
10e14d4cdbSTorok Edwin  *
11e14d4cdbSTorok Edwin  * Redistribution and use in source and binary forms, with or without
12e14d4cdbSTorok Edwin  * modification, are permitted provided that the following conditions
13e14d4cdbSTorok Edwin  * are met:
14e14d4cdbSTorok Edwin  * 1. Redistributions of source code must retain the above copyright
15e14d4cdbSTorok Edwin  *    notice, this list of conditions and the following disclaimer.
16e14d4cdbSTorok Edwin  * 2. Redistributions in binary form must reproduce the above copyright
17e14d4cdbSTorok Edwin  *    notice, this list of conditions and the following disclaimer in the
18e14d4cdbSTorok Edwin  *    documentation and/or other materials provided with the distribution.
19e14d4cdbSTorok Edwin  * 3. Neither the name of the University nor the names of its contributors
20e14d4cdbSTorok Edwin  *    may be used to endorse or promote products derived from this software
21e14d4cdbSTorok Edwin  *    without specific prior written permission.
22e14d4cdbSTorok Edwin  *
23e14d4cdbSTorok Edwin  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24e14d4cdbSTorok Edwin  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25e14d4cdbSTorok Edwin  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26e14d4cdbSTorok Edwin  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27e14d4cdbSTorok Edwin  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28e14d4cdbSTorok Edwin  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29e14d4cdbSTorok Edwin  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30e14d4cdbSTorok Edwin  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31e14d4cdbSTorok Edwin  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32e14d4cdbSTorok Edwin  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33e14d4cdbSTorok Edwin  * SUCH DAMAGE.
34e14d4cdbSTorok Edwin  *
35e14d4cdbSTorok Edwin  *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
36e14d4cdbSTorok Edwin  */
37e14d4cdbSTorok Edwin 
38e14d4cdbSTorok Edwin /*
39e14d4cdbSTorok Edwin  * the outer shell of llvm_regexec()
40e14d4cdbSTorok Edwin  *
41e14d4cdbSTorok Edwin  * This file includes engine.inc *twice*, after muchos fiddling with the
42e14d4cdbSTorok Edwin  * macros that code uses.  This lets the same code operate on two different
43e14d4cdbSTorok Edwin  * representations for state sets.
44e14d4cdbSTorok Edwin  */
45e14d4cdbSTorok Edwin #include <sys/types.h>
46e14d4cdbSTorok Edwin #include <stdio.h>
47e14d4cdbSTorok Edwin #include <stdlib.h>
48e14d4cdbSTorok Edwin #include <string.h>
49e14d4cdbSTorok Edwin #include <limits.h>
50e14d4cdbSTorok Edwin #include <ctype.h>
51e14d4cdbSTorok Edwin #include "regex_impl.h"
52e14d4cdbSTorok Edwin 
53e14d4cdbSTorok Edwin #include "regutils.h"
54e14d4cdbSTorok Edwin #include "regex2.h"
55e14d4cdbSTorok Edwin 
56e14d4cdbSTorok Edwin /* macros for manipulating states, small version */
572e071faeSNAKAMURA Takumi /* FIXME: 'states' is assumed as 'long' on small version. */
582e071faeSNAKAMURA Takumi #define	states1	long		/* for later use in llvm_regexec() decision */
592e071faeSNAKAMURA Takumi #define	states	states1
60e14d4cdbSTorok Edwin #define	CLEAR(v)	((v) = 0)
61e14d4cdbSTorok Edwin #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
62e14d4cdbSTorok Edwin #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
63e14d4cdbSTorok Edwin #define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
64e14d4cdbSTorok Edwin #define	ASSIGN(d, s)	((d) = (s))
65e14d4cdbSTorok Edwin #define	EQ(a, b)	((a) == (b))
66e14d4cdbSTorok Edwin #define	STATEVARS	long dummy	/* dummy version */
67e14d4cdbSTorok Edwin #define	STATESETUP(m, n)	/* nothing */
68e14d4cdbSTorok Edwin #define	STATETEARDOWN(m)	/* nothing */
69e14d4cdbSTorok Edwin #define	SETUP(v)	((v) = 0)
70e14d4cdbSTorok Edwin #define	onestate	long
71e14d4cdbSTorok Edwin #define	INIT(o, n)	((o) = (unsigned long)1 << (n))
72*228e6d4cSRichard Smith #define	INC(o)		((o) = (unsigned long)(o) << 1)
73e14d4cdbSTorok Edwin #define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
74e14d4cdbSTorok Edwin /* some abbreviations; note that some of these know variable names! */
75e14d4cdbSTorok Edwin /* do "if I'm here, I can also be there" etc without branches */
76e14d4cdbSTorok Edwin #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
77e14d4cdbSTorok Edwin #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
78e14d4cdbSTorok Edwin #define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
79e14d4cdbSTorok Edwin /* function names */
80e14d4cdbSTorok Edwin #define SNAMES			/* engine.inc looks after details */
81e14d4cdbSTorok Edwin 
82e14d4cdbSTorok Edwin #include "regengine.inc"
83e14d4cdbSTorok Edwin 
84e14d4cdbSTorok Edwin /* now undo things */
85e14d4cdbSTorok Edwin #undef	states
86e14d4cdbSTorok Edwin #undef	CLEAR
87e14d4cdbSTorok Edwin #undef	SET0
88e14d4cdbSTorok Edwin #undef	SET1
89e14d4cdbSTorok Edwin #undef	ISSET
90e14d4cdbSTorok Edwin #undef	ASSIGN
91e14d4cdbSTorok Edwin #undef	EQ
92e14d4cdbSTorok Edwin #undef	STATEVARS
93e14d4cdbSTorok Edwin #undef	STATESETUP
94e14d4cdbSTorok Edwin #undef	STATETEARDOWN
95e14d4cdbSTorok Edwin #undef	SETUP
96e14d4cdbSTorok Edwin #undef	onestate
97e14d4cdbSTorok Edwin #undef	INIT
98e14d4cdbSTorok Edwin #undef	INC
99e14d4cdbSTorok Edwin #undef	ISSTATEIN
100e14d4cdbSTorok Edwin #undef	FWD
101e14d4cdbSTorok Edwin #undef	BACK
102e14d4cdbSTorok Edwin #undef	ISSETBACK
103e14d4cdbSTorok Edwin #undef	SNAMES
104e14d4cdbSTorok Edwin 
105e14d4cdbSTorok Edwin /* macros for manipulating states, large version */
106e14d4cdbSTorok Edwin #define	states	char *
107e14d4cdbSTorok Edwin #define	CLEAR(v)	memset(v, 0, m->g->nstates)
108e14d4cdbSTorok Edwin #define	SET0(v, n)	((v)[n] = 0)
109e14d4cdbSTorok Edwin #define	SET1(v, n)	((v)[n] = 1)
110e14d4cdbSTorok Edwin #define	ISSET(v, n)	((v)[n])
111e14d4cdbSTorok Edwin #define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
112e14d4cdbSTorok Edwin #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
113e14d4cdbSTorok Edwin #define	STATEVARS	long vn; char *space
114e14d4cdbSTorok Edwin #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
115e14d4cdbSTorok Edwin 				if ((m)->space == NULL) return(REG_ESPACE); \
116e14d4cdbSTorok Edwin 				(m)->vn = 0; }
117e14d4cdbSTorok Edwin #define	STATETEARDOWN(m)	{ free((m)->space); }
118e14d4cdbSTorok Edwin #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
119e14d4cdbSTorok Edwin #define	onestate	long
120e14d4cdbSTorok Edwin #define	INIT(o, n)	((o) = (n))
121e14d4cdbSTorok Edwin #define	INC(o)	((o)++)
122e14d4cdbSTorok Edwin #define	ISSTATEIN(v, o)	((v)[o])
123e14d4cdbSTorok Edwin /* some abbreviations; note that some of these know variable names! */
124e14d4cdbSTorok Edwin /* do "if I'm here, I can also be there" etc without branches */
125e14d4cdbSTorok Edwin #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
126e14d4cdbSTorok Edwin #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
127e14d4cdbSTorok Edwin #define	ISSETBACK(v, n)	((v)[here - (n)])
128e14d4cdbSTorok Edwin /* function names */
129e14d4cdbSTorok Edwin #define	LNAMES			/* flag */
130e14d4cdbSTorok Edwin 
131e14d4cdbSTorok Edwin #include "regengine.inc"
132e14d4cdbSTorok Edwin 
133e14d4cdbSTorok Edwin /*
134e14d4cdbSTorok Edwin  - llvm_regexec - interface for matching
135e14d4cdbSTorok Edwin  *
136e14d4cdbSTorok Edwin  * We put this here so we can exploit knowledge of the state representation
137e14d4cdbSTorok Edwin  * when choosing which matcher to call.  Also, by this point the matchers
138e14d4cdbSTorok Edwin  * have been prototyped.
139e14d4cdbSTorok Edwin  */
140e14d4cdbSTorok Edwin int				/* 0 success, REG_NOMATCH failure */
llvm_regexec(const llvm_regex_t * preg,const char * string,size_t nmatch,llvm_regmatch_t pmatch[],int eflags)141e14d4cdbSTorok Edwin llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
142e14d4cdbSTorok Edwin              llvm_regmatch_t pmatch[], int eflags)
143e14d4cdbSTorok Edwin {
144e14d4cdbSTorok Edwin 	struct re_guts *g = preg->re_g;
145e14d4cdbSTorok Edwin #ifdef REDEBUG
146e14d4cdbSTorok Edwin #	define	GOODFLAGS(f)	(f)
147e14d4cdbSTorok Edwin #else
148e14d4cdbSTorok Edwin #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
149e14d4cdbSTorok Edwin #endif
150e14d4cdbSTorok Edwin 
151e14d4cdbSTorok Edwin 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
152e14d4cdbSTorok Edwin 		return(REG_BADPAT);
153e14d4cdbSTorok Edwin 	assert(!(g->iflags&REGEX_BAD));
154e14d4cdbSTorok Edwin 	if (g->iflags&REGEX_BAD)		/* backstop for no-debug case */
155e14d4cdbSTorok Edwin 		return(REG_BADPAT);
156e14d4cdbSTorok Edwin 	eflags = GOODFLAGS(eflags);
157e14d4cdbSTorok Edwin 
158e14d4cdbSTorok Edwin 	if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
159f93bdd10SOwen Anderson 		return(smatcher(g, string, nmatch, pmatch, eflags));
160e14d4cdbSTorok Edwin 	else
161f93bdd10SOwen Anderson 		return(lmatcher(g, string, nmatch, pmatch, eflags));
162e14d4cdbSTorok Edwin }
163