1*4887Schin /***********************************************************************
2*4887Schin *                                                                      *
3*4887Schin *               This software is part of the ast package               *
4*4887Schin *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5*4887Schin *                      and is licensed under the                       *
6*4887Schin *                  Common Public License, Version 1.0                  *
7*4887Schin *                      by AT&T Knowledge Ventures                      *
8*4887Schin *                                                                      *
9*4887Schin *                A copy of the License is available at                 *
10*4887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
11*4887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*4887Schin *                                                                      *
13*4887Schin *              Information and Software Systems Research               *
14*4887Schin *                            AT&T Research                             *
15*4887Schin *                           Florham Park NJ                            *
16*4887Schin *                                                                      *
17*4887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
18*4887Schin *                  David Korn <dgk@research.att.com>                   *
19*4887Schin *                   Phong Vo <kpv@research.att.com>                    *
20*4887Schin *                                                                      *
21*4887Schin ***********************************************************************/
22*4887Schin #pragma prototyped
23*4887Schin 
24*4887Schin /*
25*4887Schin  * posix regex record executor
26*4887Schin  * multiple record sized-buffer interface
27*4887Schin  */
28*4887Schin 
29*4887Schin #include "reglib.h"
30*4887Schin 
31*4887Schin /*
32*4887Schin  * call regnexec() on records selected by Boyer-Moore
33*4887Schin  */
34*4887Schin 
35*4887Schin int
36*4887Schin regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
37*4887Schin {
38*4887Schin 	register unsigned char*	buf = (unsigned char*)s;
39*4887Schin 	register unsigned char*	beg;
40*4887Schin 	register unsigned char*	l;
41*4887Schin 	register unsigned char*	r;
42*4887Schin 	register unsigned char*	x;
43*4887Schin 	register size_t*	skip;
44*4887Schin 	register size_t*	fail;
45*4887Schin 	register Bm_mask_t**	mask;
46*4887Schin 	register size_t		index;
47*4887Schin 	register int		n;
48*4887Schin 	unsigned char*		end;
49*4887Schin 	size_t			mid;
50*4887Schin 	int			complete;
51*4887Schin 	int			exactlen;
52*4887Schin 	int			leftlen;
53*4887Schin 	int			rightlen;
54*4887Schin 	int			inv;
55*4887Schin 	Bm_mask_t		m;
56*4887Schin 	Env_t*			env;
57*4887Schin 	Rex_t*			e;
58*4887Schin 
59*4887Schin 	if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
60*4887Schin 		return REG_BADPAT;
61*4887Schin 	inv = (flags & REG_INVERT) != 0;
62*4887Schin 	buf = beg = (unsigned char*)s;
63*4887Schin 	end = buf + len;
64*4887Schin 	mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
65*4887Schin 	skip = e->re.bm.skip;
66*4887Schin 	fail = e->re.bm.fail;
67*4887Schin 	mask = e->re.bm.mask;
68*4887Schin 	complete = e->re.bm.complete && !nmatch;
69*4887Schin 	exactlen = e->re.bm.size;
70*4887Schin 	leftlen = e->re.bm.left + exactlen;
71*4887Schin 	rightlen = exactlen + e->re.bm.right;
72*4887Schin 	index = leftlen++;
73*4887Schin 	for (;;)
74*4887Schin 	{
75*4887Schin 		while ((index += skip[buf[index]]) < mid);
76*4887Schin 		if (index < HIT)
77*4887Schin 			goto impossible;
78*4887Schin 		index -= HIT;
79*4887Schin 		m = mask[n = exactlen - 1][buf[index]];
80*4887Schin 		do
81*4887Schin 		{
82*4887Schin 			if (!n--)
83*4887Schin 				goto possible;
84*4887Schin 		} while (m &= mask[n][buf[--index]]);
85*4887Schin 		if ((index += fail[n + 1]) < len)
86*4887Schin 			continue;
87*4887Schin  impossible:
88*4887Schin 		if (inv)
89*4887Schin 		{
90*4887Schin 			l = r = buf + len;
91*4887Schin 			goto invert;
92*4887Schin 		}
93*4887Schin 		n = 0;
94*4887Schin 		goto done;
95*4887Schin  possible:
96*4887Schin 		r = (l = buf + index) + exactlen;
97*4887Schin 		while (l > beg)
98*4887Schin 			if (*--l == sep)
99*4887Schin 			{
100*4887Schin 				l++;
101*4887Schin 				break;
102*4887Schin 			}
103*4887Schin 		if ((r - l) < leftlen)
104*4887Schin 			goto spanned;
105*4887Schin 		while (r < end && *r != sep)
106*4887Schin 			r++;
107*4887Schin 		if ((r - (buf + index)) < rightlen)
108*4887Schin 			goto spanned;
109*4887Schin 		if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
110*4887Schin 		{
111*4887Schin 			if (inv)
112*4887Schin 			{
113*4887Schin  invert:
114*4887Schin 				x = beg;
115*4887Schin 				while (beg < l)
116*4887Schin 				{
117*4887Schin 					while (x < l && *x != sep)
118*4887Schin 						x++;
119*4887Schin 					if (n = (*record)(handle, (char*)beg, x - beg))
120*4887Schin 						goto done;
121*4887Schin 					beg = ++x;
122*4887Schin 				}
123*4887Schin 			}
124*4887Schin 			else if (n = (*record)(handle, (char*)l, r - l))
125*4887Schin 				goto done;
126*4887Schin 			if ((index = (r - buf) + leftlen) >= len)
127*4887Schin 			{
128*4887Schin 				n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
129*4887Schin 				goto done;
130*4887Schin 			}
131*4887Schin 			beg = r + 1;
132*4887Schin 		}
133*4887Schin 		else if (n != REG_NOMATCH)
134*4887Schin 			goto done;
135*4887Schin 		else
136*4887Schin 		{
137*4887Schin  spanned:
138*4887Schin 			if ((index += exactlen) >= mid)
139*4887Schin 				goto impossible;
140*4887Schin 		}
141*4887Schin 	}
142*4887Schin  done:
143*4887Schin 	env->rex = e;
144*4887Schin 	return n;
145*4887Schin }
146