xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/regexec.c (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric /*-
2*0b57cec5SDimitry Andric  * This code is derived from OpenBSD's libc/regex, original license follows:
3*0b57cec5SDimitry Andric  *
4*0b57cec5SDimitry Andric  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5*0b57cec5SDimitry Andric  * Copyright (c) 1992, 1993, 1994
6*0b57cec5SDimitry Andric  *	The Regents of the University of California.  All rights reserved.
7*0b57cec5SDimitry Andric  *
8*0b57cec5SDimitry Andric  * This code is derived from software contributed to Berkeley by
9*0b57cec5SDimitry Andric  * Henry Spencer.
10*0b57cec5SDimitry Andric  *
11*0b57cec5SDimitry Andric  * Redistribution and use in source and binary forms, with or without
12*0b57cec5SDimitry Andric  * modification, are permitted provided that the following conditions
13*0b57cec5SDimitry Andric  * are met:
14*0b57cec5SDimitry Andric  * 1. Redistributions of source code must retain the above copyright
15*0b57cec5SDimitry Andric  *    notice, this list of conditions and the following disclaimer.
16*0b57cec5SDimitry Andric  * 2. Redistributions in binary form must reproduce the above copyright
17*0b57cec5SDimitry Andric  *    notice, this list of conditions and the following disclaimer in the
18*0b57cec5SDimitry Andric  *    documentation and/or other materials provided with the distribution.
19*0b57cec5SDimitry Andric  * 3. Neither the name of the University nor the names of its contributors
20*0b57cec5SDimitry Andric  *    may be used to endorse or promote products derived from this software
21*0b57cec5SDimitry Andric  *    without specific prior written permission.
22*0b57cec5SDimitry Andric  *
23*0b57cec5SDimitry Andric  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24*0b57cec5SDimitry Andric  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25*0b57cec5SDimitry Andric  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26*0b57cec5SDimitry Andric  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27*0b57cec5SDimitry Andric  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28*0b57cec5SDimitry Andric  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29*0b57cec5SDimitry Andric  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30*0b57cec5SDimitry Andric  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31*0b57cec5SDimitry Andric  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32*0b57cec5SDimitry Andric  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33*0b57cec5SDimitry Andric  * SUCH DAMAGE.
34*0b57cec5SDimitry Andric  *
35*0b57cec5SDimitry Andric  *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
36*0b57cec5SDimitry Andric  */
37*0b57cec5SDimitry Andric 
38*0b57cec5SDimitry Andric /*
39*0b57cec5SDimitry Andric  * the outer shell of llvm_regexec()
40*0b57cec5SDimitry Andric  *
41*0b57cec5SDimitry Andric  * This file includes engine.inc *twice*, after muchos fiddling with the
42*0b57cec5SDimitry Andric  * macros that code uses.  This lets the same code operate on two different
43*0b57cec5SDimitry Andric  * representations for state sets.
44*0b57cec5SDimitry Andric  */
45*0b57cec5SDimitry Andric #include <sys/types.h>
46*0b57cec5SDimitry Andric #include <stdio.h>
47*0b57cec5SDimitry Andric #include <stdlib.h>
48*0b57cec5SDimitry Andric #include <string.h>
49*0b57cec5SDimitry Andric #include <limits.h>
50*0b57cec5SDimitry Andric #include <ctype.h>
51*0b57cec5SDimitry Andric #include "regex_impl.h"
52*0b57cec5SDimitry Andric 
53*0b57cec5SDimitry Andric #include "regutils.h"
54*0b57cec5SDimitry Andric #include "regex2.h"
55*0b57cec5SDimitry Andric 
56*0b57cec5SDimitry Andric /* macros for manipulating states, small version */
57*0b57cec5SDimitry Andric /* FIXME: 'states' is assumed as 'long' on small version. */
58*0b57cec5SDimitry Andric #define	states1	long		/* for later use in llvm_regexec() decision */
59*0b57cec5SDimitry Andric #define	states	states1
60*0b57cec5SDimitry Andric #define	CLEAR(v)	((v) = 0)
61*0b57cec5SDimitry Andric #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
62*0b57cec5SDimitry Andric #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
63*0b57cec5SDimitry Andric #define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
64*0b57cec5SDimitry Andric #define	ASSIGN(d, s)	((d) = (s))
65*0b57cec5SDimitry Andric #define	EQ(a, b)	((a) == (b))
66*0b57cec5SDimitry Andric #define	STATEVARS	long dummy	/* dummy version */
67*0b57cec5SDimitry Andric #define	STATESETUP(m, n)	/* nothing */
68*0b57cec5SDimitry Andric #define	STATETEARDOWN(m)	/* nothing */
69*0b57cec5SDimitry Andric #define	SETUP(v)	((v) = 0)
70*0b57cec5SDimitry Andric #define	onestate	long
71*0b57cec5SDimitry Andric #define	INIT(o, n)	((o) = (unsigned long)1 << (n))
72*0b57cec5SDimitry Andric #define	INC(o)		((o) = (unsigned long)(o) << 1)
73*0b57cec5SDimitry Andric #define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
74*0b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
75*0b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
76*0b57cec5SDimitry Andric #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
77*0b57cec5SDimitry Andric #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
78*0b57cec5SDimitry Andric #define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
79*0b57cec5SDimitry Andric /* function names */
80*0b57cec5SDimitry Andric #define SNAMES			/* engine.inc looks after details */
81*0b57cec5SDimitry Andric 
82*0b57cec5SDimitry Andric #include "regengine.inc"
83*0b57cec5SDimitry Andric 
84*0b57cec5SDimitry Andric /* now undo things */
85*0b57cec5SDimitry Andric #undef	states
86*0b57cec5SDimitry Andric #undef	CLEAR
87*0b57cec5SDimitry Andric #undef	SET0
88*0b57cec5SDimitry Andric #undef	SET1
89*0b57cec5SDimitry Andric #undef	ISSET
90*0b57cec5SDimitry Andric #undef	ASSIGN
91*0b57cec5SDimitry Andric #undef	EQ
92*0b57cec5SDimitry Andric #undef	STATEVARS
93*0b57cec5SDimitry Andric #undef	STATESETUP
94*0b57cec5SDimitry Andric #undef	STATETEARDOWN
95*0b57cec5SDimitry Andric #undef	SETUP
96*0b57cec5SDimitry Andric #undef	onestate
97*0b57cec5SDimitry Andric #undef	INIT
98*0b57cec5SDimitry Andric #undef	INC
99*0b57cec5SDimitry Andric #undef	ISSTATEIN
100*0b57cec5SDimitry Andric #undef	FWD
101*0b57cec5SDimitry Andric #undef	BACK
102*0b57cec5SDimitry Andric #undef	ISSETBACK
103*0b57cec5SDimitry Andric #undef	SNAMES
104*0b57cec5SDimitry Andric 
105*0b57cec5SDimitry Andric /* macros for manipulating states, large version */
106*0b57cec5SDimitry Andric #define	states	char *
107*0b57cec5SDimitry Andric #define	CLEAR(v)	memset(v, 0, m->g->nstates)
108*0b57cec5SDimitry Andric #define	SET0(v, n)	((v)[n] = 0)
109*0b57cec5SDimitry Andric #define	SET1(v, n)	((v)[n] = 1)
110*0b57cec5SDimitry Andric #define	ISSET(v, n)	((v)[n])
111*0b57cec5SDimitry Andric #define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
112*0b57cec5SDimitry Andric #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
113*0b57cec5SDimitry Andric #define	STATEVARS	long vn; char *space
114*0b57cec5SDimitry Andric #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
115*0b57cec5SDimitry Andric 				if ((m)->space == NULL) return(REG_ESPACE); \
116*0b57cec5SDimitry Andric 				(m)->vn = 0; }
117*0b57cec5SDimitry Andric #define	STATETEARDOWN(m)	{ free((m)->space); }
118*0b57cec5SDimitry Andric #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
119*0b57cec5SDimitry Andric #define	onestate	long
120*0b57cec5SDimitry Andric #define	INIT(o, n)	((o) = (n))
121*0b57cec5SDimitry Andric #define	INC(o)	((o)++)
122*0b57cec5SDimitry Andric #define	ISSTATEIN(v, o)	((v)[o])
123*0b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
124*0b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
125*0b57cec5SDimitry Andric #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
126*0b57cec5SDimitry Andric #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
127*0b57cec5SDimitry Andric #define	ISSETBACK(v, n)	((v)[here - (n)])
128*0b57cec5SDimitry Andric /* function names */
129*0b57cec5SDimitry Andric #define	LNAMES			/* flag */
130*0b57cec5SDimitry Andric 
131*0b57cec5SDimitry Andric #include "regengine.inc"
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric /*
134*0b57cec5SDimitry Andric  - llvm_regexec - interface for matching
135*0b57cec5SDimitry Andric  *
136*0b57cec5SDimitry Andric  * We put this here so we can exploit knowledge of the state representation
137*0b57cec5SDimitry Andric  * when choosing which matcher to call.  Also, by this point the matchers
138*0b57cec5SDimitry Andric  * have been prototyped.
139*0b57cec5SDimitry Andric  */
140*0b57cec5SDimitry Andric int				/* 0 success, REG_NOMATCH failure */
llvm_regexec(const llvm_regex_t * preg,const char * string,size_t nmatch,llvm_regmatch_t pmatch[],int eflags)141*0b57cec5SDimitry Andric llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
142*0b57cec5SDimitry Andric              llvm_regmatch_t pmatch[], int eflags)
143*0b57cec5SDimitry Andric {
144*0b57cec5SDimitry Andric 	struct re_guts *g = preg->re_g;
145*0b57cec5SDimitry Andric #ifdef REDEBUG
146*0b57cec5SDimitry Andric #	define	GOODFLAGS(f)	(f)
147*0b57cec5SDimitry Andric #else
148*0b57cec5SDimitry Andric #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
149*0b57cec5SDimitry Andric #endif
150*0b57cec5SDimitry Andric 
151*0b57cec5SDimitry Andric 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
152*0b57cec5SDimitry Andric 		return(REG_BADPAT);
153*0b57cec5SDimitry Andric 	assert(!(g->iflags&REGEX_BAD));
154*0b57cec5SDimitry Andric 	if (g->iflags&REGEX_BAD)		/* backstop for no-debug case */
155*0b57cec5SDimitry Andric 		return(REG_BADPAT);
156*0b57cec5SDimitry Andric 	eflags = GOODFLAGS(eflags);
157*0b57cec5SDimitry Andric 
158*0b57cec5SDimitry Andric 	if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
159*0b57cec5SDimitry Andric 		return(smatcher(g, string, nmatch, pmatch, eflags));
160*0b57cec5SDimitry Andric 	else
161*0b57cec5SDimitry Andric 		return(lmatcher(g, string, nmatch, pmatch, eflags));
162*0b57cec5SDimitry Andric }
163