1*f09b3194Schristos /* $NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $ */
2eb7c1594Sagc
3eb7c1594Sagc /*-
41ee269c3Schristos * SPDX-License-Identifier: BSD-3-Clause
51ee269c3Schristos *
61ee269c3Schristos * Copyright (c) 1992, 1993, 1994 Henry Spencer.
7eb7c1594Sagc * Copyright (c) 1992, 1993, 1994
8eb7c1594Sagc * The Regents of the University of California. All rights reserved.
9eb7c1594Sagc *
10eb7c1594Sagc * This code is derived from software contributed to Berkeley by
11eb7c1594Sagc * Henry Spencer.
12eb7c1594Sagc *
13eb7c1594Sagc * Redistribution and use in source and binary forms, with or without
14eb7c1594Sagc * modification, are permitted provided that the following conditions
15eb7c1594Sagc * are met:
16eb7c1594Sagc * 1. Redistributions of source code must retain the above copyright
17eb7c1594Sagc * notice, this list of conditions and the following disclaimer.
18eb7c1594Sagc * 2. Redistributions in binary form must reproduce the above copyright
19eb7c1594Sagc * notice, this list of conditions and the following disclaimer in the
20eb7c1594Sagc * documentation and/or other materials provided with the distribution.
21eb7c1594Sagc * 3. Neither the name of the University nor the names of its contributors
22eb7c1594Sagc * may be used to endorse or promote products derived from this software
23eb7c1594Sagc * without specific prior written permission.
24eb7c1594Sagc *
25eb7c1594Sagc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26eb7c1594Sagc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27eb7c1594Sagc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28eb7c1594Sagc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29eb7c1594Sagc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30eb7c1594Sagc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31eb7c1594Sagc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32eb7c1594Sagc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33eb7c1594Sagc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34eb7c1594Sagc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35eb7c1594Sagc * SUCH DAMAGE.
36eb7c1594Sagc *
37eb7c1594Sagc * @(#)regexec.c 8.3 (Berkeley) 3/20/94
38eb7c1594Sagc */
392c84ad3aScgd
40*f09b3194Schristos #if HAVE_NBTOOL_CONFIG_H
41*f09b3194Schristos #include "nbtool_config.h"
42*f09b3194Schristos #endif
43*f09b3194Schristos
445f61e3e2Schristos #include <sys/cdefs.h>
452c84ad3aScgd #if 0
467c6ed81dScgd static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
471ee269c3Schristos __FBSDID("$FreeBSD: head/lib/libc/regex/regexec.c 326025 2017-11-20 19:49:47Z pfg $");
482c84ad3aScgd #endif
49*f09b3194Schristos __RCSID("$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $");
507c6ed81dScgd
51b90ff831Sjtc /*
52b90ff831Sjtc * the outer shell of regexec()
53b90ff831Sjtc *
541ee269c3Schristos * This file includes engine.c three times, after muchos fiddling with the
55b90ff831Sjtc * macros that code uses. This lets the same code operate on two different
561ee269c3Schristos * representations for state sets and characters.
57b90ff831Sjtc */
581ee269c3Schristos
59e37fce91Schristos #ifndef LIBHACK
6043fa6fe3Sjtc #include "namespace.h"
61e37fce91Schristos #endif
62b90ff831Sjtc #include <sys/types.h>
63b90ff831Sjtc #include <stdio.h>
64b90ff831Sjtc #include <stdlib.h>
65b90ff831Sjtc #include <string.h>
661ee269c3Schristos #include <limits.h>
671ee269c3Schristos #include <ctype.h>
68931a89e7Sjunyoung #include <regex.h>
69b90ff831Sjtc
70e37fce91Schristos #if defined(__weak_alias) && !defined(LIBHACK)
__weak_alias(regexec,_regexec)7160549036Smycroft __weak_alias(regexec,_regexec)
7243fa6fe3Sjtc #endif
7343fa6fe3Sjtc
74b90ff831Sjtc #include "utils.h"
75b90ff831Sjtc #include "regex2.h"
76b90ff831Sjtc
771ee269c3Schristos static __inline size_t
781ee269c3Schristos xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
791ee269c3Schristos {
80e37fce91Schristos #ifdef NLS
811ee269c3Schristos size_t nr;
821ee269c3Schristos wchar_t wc;
831ee269c3Schristos
841ee269c3Schristos nr = mbrtowc(&wc, s, n, mbs);
851ee269c3Schristos if (wi != NULL)
861ee269c3Schristos *wi = wc;
871ee269c3Schristos if (nr == 0)
881ee269c3Schristos return (1);
891ee269c3Schristos else if (nr == (size_t)-1 || nr == (size_t)-2) {
901ee269c3Schristos memset(mbs, 0, sizeof(*mbs));
911ee269c3Schristos if (wi != NULL)
921ee269c3Schristos *wi = dummy;
931ee269c3Schristos return (1);
941ee269c3Schristos } else
951ee269c3Schristos return (nr);
96e37fce91Schristos #else
97e37fce91Schristos if (wi)
98e37fce91Schristos *wi = *s;
99e37fce91Schristos return 1;
100e37fce91Schristos #endif
1011ee269c3Schristos }
1021ee269c3Schristos
1031ee269c3Schristos static __inline size_t
xmbrtowc_dummy(wint_t * wi,const char * s,size_t n __unused,mbstate_t * mbs __unused,wint_t dummy __unused)1041ee269c3Schristos xmbrtowc_dummy(wint_t *wi,
1051ee269c3Schristos const char *s,
1061ee269c3Schristos size_t n __unused,
1071ee269c3Schristos mbstate_t *mbs __unused,
1081ee269c3Schristos wint_t dummy __unused)
1091ee269c3Schristos {
1101ee269c3Schristos
1111ee269c3Schristos if (wi != NULL)
1121ee269c3Schristos *wi = (unsigned char)*s;
1131ee269c3Schristos return (1);
1141ee269c3Schristos }
1151ee269c3Schristos
116b90ff831Sjtc /* macros for manipulating states, small version */
1171ee269c3Schristos #define states long
1181ee269c3Schristos #define states1 states /* for later use in regexec() decision */
119b90ff831Sjtc #define CLEAR(v) ((v) = 0)
120c300f8f4Sdrochner #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
121c300f8f4Sdrochner #define SET1(v, n) ((v) |= (unsigned long)1 << (n))
122c300f8f4Sdrochner #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
123b90ff831Sjtc #define ASSIGN(d, s) ((d) = (s))
124b90ff831Sjtc #define EQ(a, b) ((a) == (b))
1251ee269c3Schristos #define STATEVARS long dummy /* dummy version */
126b90ff831Sjtc #define STATESETUP(m, n) /* nothing */
127b90ff831Sjtc #define STATETEARDOWN(m) /* nothing */
128b90ff831Sjtc #define SETUP(v) ((v) = 0)
1291ee269c3Schristos #define onestate long
130c300f8f4Sdrochner #define INIT(o, n) ((o) = (unsigned long)1 << (n))
131eea7e63fScgd #define INC(o) ((o) <<= 1)
132da1fb002Scgd #define ISSTATEIN(v, o) (((v) & (o)) != 0)
133b90ff831Sjtc /* some abbreviations; note that some of these know variable names! */
134b90ff831Sjtc /* do "if I'm here, I can also be there" etc without branches */
135c300f8f4Sdrochner #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
136c300f8f4Sdrochner #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
137c300f8f4Sdrochner #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
1381ee269c3Schristos /* no multibyte support */
1391ee269c3Schristos #define XMBRTOWC xmbrtowc_dummy
1401ee269c3Schristos #define ZAPSTATE(mbs) ((void)(mbs))
141b90ff831Sjtc /* function names */
142b90ff831Sjtc #define SNAMES /* engine.c looks after details */
143b90ff831Sjtc
144b90ff831Sjtc #include "engine.c"
145b90ff831Sjtc
146b90ff831Sjtc /* now undo things */
147b90ff831Sjtc #undef states
148b90ff831Sjtc #undef CLEAR
149b90ff831Sjtc #undef SET0
150b90ff831Sjtc #undef SET1
151b90ff831Sjtc #undef ISSET
152b90ff831Sjtc #undef ASSIGN
153b90ff831Sjtc #undef EQ
154b90ff831Sjtc #undef STATEVARS
155b90ff831Sjtc #undef STATESETUP
156b90ff831Sjtc #undef STATETEARDOWN
157b90ff831Sjtc #undef SETUP
158b90ff831Sjtc #undef onestate
159b90ff831Sjtc #undef INIT
160b90ff831Sjtc #undef INC
161b90ff831Sjtc #undef ISSTATEIN
162b90ff831Sjtc #undef FWD
163b90ff831Sjtc #undef BACK
164b90ff831Sjtc #undef ISSETBACK
165b90ff831Sjtc #undef SNAMES
1661ee269c3Schristos #undef XMBRTOWC
1671ee269c3Schristos #undef ZAPSTATE
168b90ff831Sjtc
169b90ff831Sjtc /* macros for manipulating states, large version */
170b90ff831Sjtc #define states char *
1711ee269c3Schristos #define CLEAR(v) memset(v, 0, m->g->nstates)
172b90ff831Sjtc #define SET0(v, n) ((v)[n] = 0)
173b90ff831Sjtc #define SET1(v, n) ((v)[n] = 1)
174b90ff831Sjtc #define ISSET(v, n) ((v)[n])
1751ee269c3Schristos #define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
1761ee269c3Schristos #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
1771ee269c3Schristos #define STATEVARS long vn; char *space
1781ee269c3Schristos #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
1791ee269c3Schristos if ((m)->space == NULL) return(REG_ESPACE); \
1801ee269c3Schristos (m)->vn = 0; }
1811ee269c3Schristos #define STATETEARDOWN(m) { free((m)->space); }
1821ee269c3Schristos #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
1831ee269c3Schristos #define onestate long
1841ee269c3Schristos #define INIT(o, n) ((o) = (n))
185b90ff831Sjtc #define INC(o) ((o)++)
186b90ff831Sjtc #define ISSTATEIN(v, o) ((v)[o])
187b90ff831Sjtc /* some abbreviations; note that some of these know variable names! */
188b90ff831Sjtc /* do "if I'm here, I can also be there" etc without branches */
189b90ff831Sjtc #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
190b90ff831Sjtc #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
191b90ff831Sjtc #define ISSETBACK(v, n) ((v)[here - (n)])
1921ee269c3Schristos /* no multibyte support */
1931ee269c3Schristos #define XMBRTOWC xmbrtowc_dummy
1941ee269c3Schristos #define ZAPSTATE(mbs) ((void)(mbs))
195b90ff831Sjtc /* function names */
196b90ff831Sjtc #define LNAMES /* flag */
197b90ff831Sjtc
198b90ff831Sjtc #include "engine.c"
199b90ff831Sjtc
2001ee269c3Schristos /* multibyte character & large states version */
2011ee269c3Schristos #undef LNAMES
2021ee269c3Schristos #undef XMBRTOWC
2031ee269c3Schristos #undef ZAPSTATE
2041ee269c3Schristos #define XMBRTOWC xmbrtowc
2051ee269c3Schristos #define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs)))
2061ee269c3Schristos #define MNAMES
2071ee269c3Schristos
2081ee269c3Schristos #include "engine.c"
2091ee269c3Schristos
210b90ff831Sjtc /*
211b90ff831Sjtc - regexec - interface for matching
2126931099eSjtc = extern int regexec(const regex_t *, const char *, size_t, \
2136931099eSjtc = regmatch_t [], int);
214b90ff831Sjtc = #define REG_NOTBOL 00001
215b90ff831Sjtc = #define REG_NOTEOL 00002
216b90ff831Sjtc = #define REG_STARTEND 00004
217b90ff831Sjtc = #define REG_TRACE 00400 // tracing of execution
218b90ff831Sjtc = #define REG_LARGE 01000 // force large representation
219b90ff831Sjtc = #define REG_BACKR 02000 // force use of backref code
220b90ff831Sjtc *
221b90ff831Sjtc * We put this here so we can exploit knowledge of the state representation
222b90ff831Sjtc * when choosing which matcher to call. Also, by this point the matchers
223b90ff831Sjtc * have been prototyped.
224b90ff831Sjtc */
225b90ff831Sjtc int /* 0 success, REG_NOMATCH failure */
regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[__restrict],int eflags)2261ee269c3Schristos regexec(const regex_t * __restrict preg,
2271ee269c3Schristos const char * __restrict string,
2289641e3f0Sjunyoung size_t nmatch,
2291ee269c3Schristos regmatch_t pmatch[__restrict],
2309641e3f0Sjunyoung int eflags)
231b90ff831Sjtc {
232c8bafd62Sperry struct re_guts *g = preg->re_g;
233b90ff831Sjtc #ifdef REDEBUG
234b90ff831Sjtc # define GOODFLAGS(f) (f)
235b90ff831Sjtc #else
236b90ff831Sjtc # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
237b90ff831Sjtc #endif
238b48252f3Slukem _DIAGASSERT(preg != NULL);
239b48252f3Slukem _DIAGASSERT(string != NULL);
240b48252f3Slukem
241b90ff831Sjtc if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
242b90ff831Sjtc return(REG_BADPAT);
243b90ff831Sjtc assert(!(g->iflags&BAD));
244b90ff831Sjtc if (g->iflags&BAD) /* backstop for no-debug case */
245b90ff831Sjtc return(REG_BADPAT);
2463ed83140Sjtc eflags = GOODFLAGS(eflags);
247b90ff831Sjtc
2481ee269c3Schristos if (MB_CUR_MAX > 1)
2491ee269c3Schristos return(mmatcher(g, string, nmatch, pmatch, eflags));
2501ee269c3Schristos else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
2511ee269c3Schristos return(smatcher(g, string, nmatch, pmatch, eflags));
252b90ff831Sjtc else
2531ee269c3Schristos return(lmatcher(g, string, nmatch, pmatch, eflags));
254b90ff831Sjtc }
255