xref: /netbsd-src/lib/libc/regex/regexec.c (revision f09b31947cf29e1e0627e7ba33f9f2058d7c3c88)
1*f09b3194Schristos /*	$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $	*/
2eb7c1594Sagc 
3eb7c1594Sagc /*-
41ee269c3Schristos  * SPDX-License-Identifier: BSD-3-Clause
51ee269c3Schristos  *
61ee269c3Schristos  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
7eb7c1594Sagc  * Copyright (c) 1992, 1993, 1994
8eb7c1594Sagc  *	The Regents of the University of California.  All rights reserved.
9eb7c1594Sagc  *
10eb7c1594Sagc  * This code is derived from software contributed to Berkeley by
11eb7c1594Sagc  * Henry Spencer.
12eb7c1594Sagc  *
13eb7c1594Sagc  * Redistribution and use in source and binary forms, with or without
14eb7c1594Sagc  * modification, are permitted provided that the following conditions
15eb7c1594Sagc  * are met:
16eb7c1594Sagc  * 1. Redistributions of source code must retain the above copyright
17eb7c1594Sagc  *    notice, this list of conditions and the following disclaimer.
18eb7c1594Sagc  * 2. Redistributions in binary form must reproduce the above copyright
19eb7c1594Sagc  *    notice, this list of conditions and the following disclaimer in the
20eb7c1594Sagc  *    documentation and/or other materials provided with the distribution.
21eb7c1594Sagc  * 3. Neither the name of the University nor the names of its contributors
22eb7c1594Sagc  *    may be used to endorse or promote products derived from this software
23eb7c1594Sagc  *    without specific prior written permission.
24eb7c1594Sagc  *
25eb7c1594Sagc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26eb7c1594Sagc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27eb7c1594Sagc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28eb7c1594Sagc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29eb7c1594Sagc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30eb7c1594Sagc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31eb7c1594Sagc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32eb7c1594Sagc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33eb7c1594Sagc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34eb7c1594Sagc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35eb7c1594Sagc  * SUCH DAMAGE.
36eb7c1594Sagc  *
37eb7c1594Sagc  *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
38eb7c1594Sagc  */
392c84ad3aScgd 
40*f09b3194Schristos #if HAVE_NBTOOL_CONFIG_H
41*f09b3194Schristos #include "nbtool_config.h"
42*f09b3194Schristos #endif
43*f09b3194Schristos 
445f61e3e2Schristos #include <sys/cdefs.h>
452c84ad3aScgd #if 0
467c6ed81dScgd static char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
471ee269c3Schristos __FBSDID("$FreeBSD: head/lib/libc/regex/regexec.c 326025 2017-11-20 19:49:47Z pfg $");
482c84ad3aScgd #endif
49*f09b3194Schristos __RCSID("$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $");
507c6ed81dScgd 
51b90ff831Sjtc /*
52b90ff831Sjtc  * the outer shell of regexec()
53b90ff831Sjtc  *
541ee269c3Schristos  * This file includes engine.c three times, after muchos fiddling with the
55b90ff831Sjtc  * macros that code uses.  This lets the same code operate on two different
561ee269c3Schristos  * representations for state sets and characters.
57b90ff831Sjtc  */
581ee269c3Schristos 
59e37fce91Schristos #ifndef LIBHACK
6043fa6fe3Sjtc #include "namespace.h"
61e37fce91Schristos #endif
62b90ff831Sjtc #include <sys/types.h>
63b90ff831Sjtc #include <stdio.h>
64b90ff831Sjtc #include <stdlib.h>
65b90ff831Sjtc #include <string.h>
661ee269c3Schristos #include <limits.h>
671ee269c3Schristos #include <ctype.h>
68931a89e7Sjunyoung #include <regex.h>
69b90ff831Sjtc 
70e37fce91Schristos #if defined(__weak_alias) && !defined(LIBHACK)
__weak_alias(regexec,_regexec)7160549036Smycroft __weak_alias(regexec,_regexec)
7243fa6fe3Sjtc #endif
7343fa6fe3Sjtc 
74b90ff831Sjtc #include "utils.h"
75b90ff831Sjtc #include "regex2.h"
76b90ff831Sjtc 
771ee269c3Schristos static __inline size_t
781ee269c3Schristos xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
791ee269c3Schristos {
80e37fce91Schristos #ifdef NLS
811ee269c3Schristos 	size_t nr;
821ee269c3Schristos 	wchar_t wc;
831ee269c3Schristos 
841ee269c3Schristos 	nr = mbrtowc(&wc, s, n, mbs);
851ee269c3Schristos 	if (wi != NULL)
861ee269c3Schristos 		*wi = wc;
871ee269c3Schristos 	if (nr == 0)
881ee269c3Schristos 		return (1);
891ee269c3Schristos 	else if (nr == (size_t)-1 || nr == (size_t)-2) {
901ee269c3Schristos 		memset(mbs, 0, sizeof(*mbs));
911ee269c3Schristos 		if (wi != NULL)
921ee269c3Schristos 			*wi = dummy;
931ee269c3Schristos 		return (1);
941ee269c3Schristos 	} else
951ee269c3Schristos                 return (nr);
96e37fce91Schristos #else
97e37fce91Schristos 	if (wi)
98e37fce91Schristos 		*wi = *s;
99e37fce91Schristos 	return 1;
100e37fce91Schristos #endif
1011ee269c3Schristos }
1021ee269c3Schristos 
1031ee269c3Schristos static __inline size_t
xmbrtowc_dummy(wint_t * wi,const char * s,size_t n __unused,mbstate_t * mbs __unused,wint_t dummy __unused)1041ee269c3Schristos xmbrtowc_dummy(wint_t *wi,
1051ee269c3Schristos 		const char *s,
1061ee269c3Schristos 		size_t n __unused,
1071ee269c3Schristos 		mbstate_t *mbs __unused,
1081ee269c3Schristos 		wint_t dummy __unused)
1091ee269c3Schristos {
1101ee269c3Schristos 
1111ee269c3Schristos 	if (wi != NULL)
1121ee269c3Schristos 		*wi = (unsigned char)*s;
1131ee269c3Schristos 	return (1);
1141ee269c3Schristos }
1151ee269c3Schristos 
116b90ff831Sjtc /* macros for manipulating states, small version */
1171ee269c3Schristos #define	states	long
1181ee269c3Schristos #define	states1	states		/* for later use in regexec() decision */
119b90ff831Sjtc #define	CLEAR(v)	((v) = 0)
120c300f8f4Sdrochner #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
121c300f8f4Sdrochner #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
122c300f8f4Sdrochner #define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
123b90ff831Sjtc #define	ASSIGN(d, s)	((d) = (s))
124b90ff831Sjtc #define	EQ(a, b)	((a) == (b))
1251ee269c3Schristos #define	STATEVARS	long dummy	/* dummy version */
126b90ff831Sjtc #define	STATESETUP(m, n)	/* nothing */
127b90ff831Sjtc #define	STATETEARDOWN(m)	/* nothing */
128b90ff831Sjtc #define	SETUP(v)	((v) = 0)
1291ee269c3Schristos #define	onestate	long
130c300f8f4Sdrochner #define	INIT(o, n)	((o) = (unsigned long)1 << (n))
131eea7e63fScgd #define	INC(o)	((o) <<= 1)
132da1fb002Scgd #define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
133b90ff831Sjtc /* some abbreviations; note that some of these know variable names! */
134b90ff831Sjtc /* do "if I'm here, I can also be there" etc without branches */
135c300f8f4Sdrochner #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
136c300f8f4Sdrochner #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
137c300f8f4Sdrochner #define	ISSETBACK(v, n)	(((v) & ((unsigned long)here >> (n))) != 0)
1381ee269c3Schristos /* no multibyte support */
1391ee269c3Schristos #define	XMBRTOWC	xmbrtowc_dummy
1401ee269c3Schristos #define	ZAPSTATE(mbs)	((void)(mbs))
141b90ff831Sjtc /* function names */
142b90ff831Sjtc #define SNAMES			/* engine.c looks after details */
143b90ff831Sjtc 
144b90ff831Sjtc #include "engine.c"
145b90ff831Sjtc 
146b90ff831Sjtc /* now undo things */
147b90ff831Sjtc #undef	states
148b90ff831Sjtc #undef	CLEAR
149b90ff831Sjtc #undef	SET0
150b90ff831Sjtc #undef	SET1
151b90ff831Sjtc #undef	ISSET
152b90ff831Sjtc #undef	ASSIGN
153b90ff831Sjtc #undef	EQ
154b90ff831Sjtc #undef	STATEVARS
155b90ff831Sjtc #undef	STATESETUP
156b90ff831Sjtc #undef	STATETEARDOWN
157b90ff831Sjtc #undef	SETUP
158b90ff831Sjtc #undef	onestate
159b90ff831Sjtc #undef	INIT
160b90ff831Sjtc #undef	INC
161b90ff831Sjtc #undef	ISSTATEIN
162b90ff831Sjtc #undef	FWD
163b90ff831Sjtc #undef	BACK
164b90ff831Sjtc #undef	ISSETBACK
165b90ff831Sjtc #undef	SNAMES
1661ee269c3Schristos #undef	XMBRTOWC
1671ee269c3Schristos #undef	ZAPSTATE
168b90ff831Sjtc 
169b90ff831Sjtc /* macros for manipulating states, large version */
170b90ff831Sjtc #define	states	char *
1711ee269c3Schristos #define	CLEAR(v)	memset(v, 0, m->g->nstates)
172b90ff831Sjtc #define	SET0(v, n)	((v)[n] = 0)
173b90ff831Sjtc #define	SET1(v, n)	((v)[n] = 1)
174b90ff831Sjtc #define	ISSET(v, n)	((v)[n])
1751ee269c3Schristos #define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
1761ee269c3Schristos #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
1771ee269c3Schristos #define	STATEVARS	long vn; char *space
1781ee269c3Schristos #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
1791ee269c3Schristos 				if ((m)->space == NULL) return(REG_ESPACE); \
1801ee269c3Schristos 				(m)->vn = 0; }
1811ee269c3Schristos #define	STATETEARDOWN(m)	{ free((m)->space); }
1821ee269c3Schristos #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
1831ee269c3Schristos #define	onestate	long
1841ee269c3Schristos #define	INIT(o, n)	((o) = (n))
185b90ff831Sjtc #define	INC(o)	((o)++)
186b90ff831Sjtc #define	ISSTATEIN(v, o)	((v)[o])
187b90ff831Sjtc /* some abbreviations; note that some of these know variable names! */
188b90ff831Sjtc /* do "if I'm here, I can also be there" etc without branches */
189b90ff831Sjtc #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
190b90ff831Sjtc #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
191b90ff831Sjtc #define	ISSETBACK(v, n)	((v)[here - (n)])
1921ee269c3Schristos /* no multibyte support */
1931ee269c3Schristos #define	XMBRTOWC	xmbrtowc_dummy
1941ee269c3Schristos #define	ZAPSTATE(mbs)	((void)(mbs))
195b90ff831Sjtc /* function names */
196b90ff831Sjtc #define	LNAMES			/* flag */
197b90ff831Sjtc 
198b90ff831Sjtc #include "engine.c"
199b90ff831Sjtc 
2001ee269c3Schristos /* multibyte character & large states version */
2011ee269c3Schristos #undef	LNAMES
2021ee269c3Schristos #undef	XMBRTOWC
2031ee269c3Schristos #undef	ZAPSTATE
2041ee269c3Schristos #define	XMBRTOWC	xmbrtowc
2051ee269c3Schristos #define	ZAPSTATE(mbs)	memset((mbs), 0, sizeof(*(mbs)))
2061ee269c3Schristos #define	MNAMES
2071ee269c3Schristos 
2081ee269c3Schristos #include "engine.c"
2091ee269c3Schristos 
210b90ff831Sjtc /*
211b90ff831Sjtc  - regexec - interface for matching
2126931099eSjtc  = extern int regexec(const regex_t *, const char *, size_t, \
2136931099eSjtc  =					regmatch_t [], int);
214b90ff831Sjtc  = #define	REG_NOTBOL	00001
215b90ff831Sjtc  = #define	REG_NOTEOL	00002
216b90ff831Sjtc  = #define	REG_STARTEND	00004
217b90ff831Sjtc  = #define	REG_TRACE	00400	// tracing of execution
218b90ff831Sjtc  = #define	REG_LARGE	01000	// force large representation
219b90ff831Sjtc  = #define	REG_BACKR	02000	// force use of backref code
220b90ff831Sjtc  *
221b90ff831Sjtc  * We put this here so we can exploit knowledge of the state representation
222b90ff831Sjtc  * when choosing which matcher to call.  Also, by this point the matchers
223b90ff831Sjtc  * have been prototyped.
224b90ff831Sjtc  */
225b90ff831Sjtc int				/* 0 success, REG_NOMATCH failure */
regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[__restrict],int eflags)2261ee269c3Schristos regexec(const regex_t * __restrict preg,
2271ee269c3Schristos 	const char * __restrict string,
2289641e3f0Sjunyoung 	size_t nmatch,
2291ee269c3Schristos 	regmatch_t pmatch[__restrict],
2309641e3f0Sjunyoung 	int eflags)
231b90ff831Sjtc {
232c8bafd62Sperry 	struct re_guts *g = preg->re_g;
233b90ff831Sjtc #ifdef REDEBUG
234b90ff831Sjtc #	define	GOODFLAGS(f)	(f)
235b90ff831Sjtc #else
236b90ff831Sjtc #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
237b90ff831Sjtc #endif
238b48252f3Slukem 	_DIAGASSERT(preg != NULL);
239b48252f3Slukem 	_DIAGASSERT(string != NULL);
240b48252f3Slukem 
241b90ff831Sjtc 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
242b90ff831Sjtc 		return(REG_BADPAT);
243b90ff831Sjtc 	assert(!(g->iflags&BAD));
244b90ff831Sjtc 	if (g->iflags&BAD)		/* backstop for no-debug case */
245b90ff831Sjtc 		return(REG_BADPAT);
2463ed83140Sjtc 	eflags = GOODFLAGS(eflags);
247b90ff831Sjtc 
2481ee269c3Schristos 	if (MB_CUR_MAX > 1)
2491ee269c3Schristos 		return(mmatcher(g, string, nmatch, pmatch, eflags));
2501ee269c3Schristos 	else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
2511ee269c3Schristos 		return(smatcher(g, string, nmatch, pmatch, eflags));
252b90ff831Sjtc 	else
2531ee269c3Schristos 		return(lmatcher(g, string, nmatch, pmatch, eflags));
254b90ff831Sjtc }
255