1*0b57cec5SDimitry Andric /*-
2*0b57cec5SDimitry Andric * This code is derived from OpenBSD's libc/regex, original license follows:
3*0b57cec5SDimitry Andric *
4*0b57cec5SDimitry Andric * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5*0b57cec5SDimitry Andric * Copyright (c) 1992, 1993, 1994
6*0b57cec5SDimitry Andric * The Regents of the University of California. All rights reserved.
7*0b57cec5SDimitry Andric *
8*0b57cec5SDimitry Andric * This code is derived from software contributed to Berkeley by
9*0b57cec5SDimitry Andric * Henry Spencer.
10*0b57cec5SDimitry Andric *
11*0b57cec5SDimitry Andric * Redistribution and use in source and binary forms, with or without
12*0b57cec5SDimitry Andric * modification, are permitted provided that the following conditions
13*0b57cec5SDimitry Andric * are met:
14*0b57cec5SDimitry Andric * 1. Redistributions of source code must retain the above copyright
15*0b57cec5SDimitry Andric * notice, this list of conditions and the following disclaimer.
16*0b57cec5SDimitry Andric * 2. Redistributions in binary form must reproduce the above copyright
17*0b57cec5SDimitry Andric * notice, this list of conditions and the following disclaimer in the
18*0b57cec5SDimitry Andric * documentation and/or other materials provided with the distribution.
19*0b57cec5SDimitry Andric * 3. Neither the name of the University nor the names of its contributors
20*0b57cec5SDimitry Andric * may be used to endorse or promote products derived from this software
21*0b57cec5SDimitry Andric * without specific prior written permission.
22*0b57cec5SDimitry Andric *
23*0b57cec5SDimitry Andric * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24*0b57cec5SDimitry Andric * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25*0b57cec5SDimitry Andric * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26*0b57cec5SDimitry Andric * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27*0b57cec5SDimitry Andric * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28*0b57cec5SDimitry Andric * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29*0b57cec5SDimitry Andric * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30*0b57cec5SDimitry Andric * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31*0b57cec5SDimitry Andric * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32*0b57cec5SDimitry Andric * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33*0b57cec5SDimitry Andric * SUCH DAMAGE.
34*0b57cec5SDimitry Andric *
35*0b57cec5SDimitry Andric * @(#)regexec.c 8.3 (Berkeley) 3/20/94
36*0b57cec5SDimitry Andric */
37*0b57cec5SDimitry Andric
38*0b57cec5SDimitry Andric /*
39*0b57cec5SDimitry Andric * the outer shell of llvm_regexec()
40*0b57cec5SDimitry Andric *
41*0b57cec5SDimitry Andric * This file includes engine.inc *twice*, after muchos fiddling with the
42*0b57cec5SDimitry Andric * macros that code uses. This lets the same code operate on two different
43*0b57cec5SDimitry Andric * representations for state sets.
44*0b57cec5SDimitry Andric */
45*0b57cec5SDimitry Andric #include <sys/types.h>
46*0b57cec5SDimitry Andric #include <stdio.h>
47*0b57cec5SDimitry Andric #include <stdlib.h>
48*0b57cec5SDimitry Andric #include <string.h>
49*0b57cec5SDimitry Andric #include <limits.h>
50*0b57cec5SDimitry Andric #include <ctype.h>
51*0b57cec5SDimitry Andric #include "regex_impl.h"
52*0b57cec5SDimitry Andric
53*0b57cec5SDimitry Andric #include "regutils.h"
54*0b57cec5SDimitry Andric #include "regex2.h"
55*0b57cec5SDimitry Andric
56*0b57cec5SDimitry Andric /* macros for manipulating states, small version */
57*0b57cec5SDimitry Andric /* FIXME: 'states' is assumed as 'long' on small version. */
58*0b57cec5SDimitry Andric #define states1 long /* for later use in llvm_regexec() decision */
59*0b57cec5SDimitry Andric #define states states1
60*0b57cec5SDimitry Andric #define CLEAR(v) ((v) = 0)
61*0b57cec5SDimitry Andric #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
62*0b57cec5SDimitry Andric #define SET1(v, n) ((v) |= (unsigned long)1 << (n))
63*0b57cec5SDimitry Andric #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
64*0b57cec5SDimitry Andric #define ASSIGN(d, s) ((d) = (s))
65*0b57cec5SDimitry Andric #define EQ(a, b) ((a) == (b))
66*0b57cec5SDimitry Andric #define STATEVARS long dummy /* dummy version */
67*0b57cec5SDimitry Andric #define STATESETUP(m, n) /* nothing */
68*0b57cec5SDimitry Andric #define STATETEARDOWN(m) /* nothing */
69*0b57cec5SDimitry Andric #define SETUP(v) ((v) = 0)
70*0b57cec5SDimitry Andric #define onestate long
71*0b57cec5SDimitry Andric #define INIT(o, n) ((o) = (unsigned long)1 << (n))
72*0b57cec5SDimitry Andric #define INC(o) ((o) = (unsigned long)(o) << 1)
73*0b57cec5SDimitry Andric #define ISSTATEIN(v, o) (((v) & (o)) != 0)
74*0b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
75*0b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
76*0b57cec5SDimitry Andric #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
77*0b57cec5SDimitry Andric #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
78*0b57cec5SDimitry Andric #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
79*0b57cec5SDimitry Andric /* function names */
80*0b57cec5SDimitry Andric #define SNAMES /* engine.inc looks after details */
81*0b57cec5SDimitry Andric
82*0b57cec5SDimitry Andric #include "regengine.inc"
83*0b57cec5SDimitry Andric
84*0b57cec5SDimitry Andric /* now undo things */
85*0b57cec5SDimitry Andric #undef states
86*0b57cec5SDimitry Andric #undef CLEAR
87*0b57cec5SDimitry Andric #undef SET0
88*0b57cec5SDimitry Andric #undef SET1
89*0b57cec5SDimitry Andric #undef ISSET
90*0b57cec5SDimitry Andric #undef ASSIGN
91*0b57cec5SDimitry Andric #undef EQ
92*0b57cec5SDimitry Andric #undef STATEVARS
93*0b57cec5SDimitry Andric #undef STATESETUP
94*0b57cec5SDimitry Andric #undef STATETEARDOWN
95*0b57cec5SDimitry Andric #undef SETUP
96*0b57cec5SDimitry Andric #undef onestate
97*0b57cec5SDimitry Andric #undef INIT
98*0b57cec5SDimitry Andric #undef INC
99*0b57cec5SDimitry Andric #undef ISSTATEIN
100*0b57cec5SDimitry Andric #undef FWD
101*0b57cec5SDimitry Andric #undef BACK
102*0b57cec5SDimitry Andric #undef ISSETBACK
103*0b57cec5SDimitry Andric #undef SNAMES
104*0b57cec5SDimitry Andric
105*0b57cec5SDimitry Andric /* macros for manipulating states, large version */
106*0b57cec5SDimitry Andric #define states char *
107*0b57cec5SDimitry Andric #define CLEAR(v) memset(v, 0, m->g->nstates)
108*0b57cec5SDimitry Andric #define SET0(v, n) ((v)[n] = 0)
109*0b57cec5SDimitry Andric #define SET1(v, n) ((v)[n] = 1)
110*0b57cec5SDimitry Andric #define ISSET(v, n) ((v)[n])
111*0b57cec5SDimitry Andric #define ASSIGN(d, s) memmove(d, s, m->g->nstates)
112*0b57cec5SDimitry Andric #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
113*0b57cec5SDimitry Andric #define STATEVARS long vn; char *space
114*0b57cec5SDimitry Andric #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
115*0b57cec5SDimitry Andric if ((m)->space == NULL) return(REG_ESPACE); \
116*0b57cec5SDimitry Andric (m)->vn = 0; }
117*0b57cec5SDimitry Andric #define STATETEARDOWN(m) { free((m)->space); }
118*0b57cec5SDimitry Andric #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
119*0b57cec5SDimitry Andric #define onestate long
120*0b57cec5SDimitry Andric #define INIT(o, n) ((o) = (n))
121*0b57cec5SDimitry Andric #define INC(o) ((o)++)
122*0b57cec5SDimitry Andric #define ISSTATEIN(v, o) ((v)[o])
123*0b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
124*0b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
125*0b57cec5SDimitry Andric #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
126*0b57cec5SDimitry Andric #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
127*0b57cec5SDimitry Andric #define ISSETBACK(v, n) ((v)[here - (n)])
128*0b57cec5SDimitry Andric /* function names */
129*0b57cec5SDimitry Andric #define LNAMES /* flag */
130*0b57cec5SDimitry Andric
131*0b57cec5SDimitry Andric #include "regengine.inc"
132*0b57cec5SDimitry Andric
133*0b57cec5SDimitry Andric /*
134*0b57cec5SDimitry Andric - llvm_regexec - interface for matching
135*0b57cec5SDimitry Andric *
136*0b57cec5SDimitry Andric * We put this here so we can exploit knowledge of the state representation
137*0b57cec5SDimitry Andric * when choosing which matcher to call. Also, by this point the matchers
138*0b57cec5SDimitry Andric * have been prototyped.
139*0b57cec5SDimitry Andric */
140*0b57cec5SDimitry Andric int /* 0 success, REG_NOMATCH failure */
llvm_regexec(const llvm_regex_t * preg,const char * string,size_t nmatch,llvm_regmatch_t pmatch[],int eflags)141*0b57cec5SDimitry Andric llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
142*0b57cec5SDimitry Andric llvm_regmatch_t pmatch[], int eflags)
143*0b57cec5SDimitry Andric {
144*0b57cec5SDimitry Andric struct re_guts *g = preg->re_g;
145*0b57cec5SDimitry Andric #ifdef REDEBUG
146*0b57cec5SDimitry Andric # define GOODFLAGS(f) (f)
147*0b57cec5SDimitry Andric #else
148*0b57cec5SDimitry Andric # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
149*0b57cec5SDimitry Andric #endif
150*0b57cec5SDimitry Andric
151*0b57cec5SDimitry Andric if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
152*0b57cec5SDimitry Andric return(REG_BADPAT);
153*0b57cec5SDimitry Andric assert(!(g->iflags®EX_BAD));
154*0b57cec5SDimitry Andric if (g->iflags®EX_BAD) /* backstop for no-debug case */
155*0b57cec5SDimitry Andric return(REG_BADPAT);
156*0b57cec5SDimitry Andric eflags = GOODFLAGS(eflags);
157*0b57cec5SDimitry Andric
158*0b57cec5SDimitry Andric if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE))
159*0b57cec5SDimitry Andric return(smatcher(g, string, nmatch, pmatch, eflags));
160*0b57cec5SDimitry Andric else
161*0b57cec5SDimitry Andric return(lmatcher(g, string, nmatch, pmatch, eflags));
162*0b57cec5SDimitry Andric }
163