xref: /onnv-gate/usr/src/lib/libast/common/regex/regrexec.c (revision 12068:08a39a083754)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*12068SRoger.Faulkner@Oracle.COM *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                   Phong Vo <kpv@research.att.com>                    *
204887Schin *                                                                      *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin 
244887Schin /*
254887Schin  * posix regex record executor
264887Schin  * multiple record sized-buffer interface
274887Schin  */
284887Schin 
294887Schin #include "reglib.h"
304887Schin 
314887Schin /*
324887Schin  * call regnexec() on records selected by Boyer-Moore
334887Schin  */
344887Schin 
354887Schin int
regrexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags,int sep,void * handle,regrecord_t record)364887Schin regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
374887Schin {
384887Schin 	register unsigned char*	buf = (unsigned char*)s;
394887Schin 	register unsigned char*	beg;
404887Schin 	register unsigned char*	l;
414887Schin 	register unsigned char*	r;
424887Schin 	register unsigned char*	x;
434887Schin 	register size_t*	skip;
444887Schin 	register size_t*	fail;
454887Schin 	register Bm_mask_t**	mask;
464887Schin 	register size_t		index;
474887Schin 	register int		n;
484887Schin 	unsigned char*		end;
494887Schin 	size_t			mid;
504887Schin 	int			complete;
514887Schin 	int			exactlen;
524887Schin 	int			leftlen;
534887Schin 	int			rightlen;
544887Schin 	int			inv;
554887Schin 	Bm_mask_t		m;
564887Schin 	Env_t*			env;
574887Schin 	Rex_t*			e;
584887Schin 
594887Schin 	if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
604887Schin 		return REG_BADPAT;
614887Schin 	inv = (flags & REG_INVERT) != 0;
624887Schin 	buf = beg = (unsigned char*)s;
634887Schin 	end = buf + len;
644887Schin 	mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
654887Schin 	skip = e->re.bm.skip;
664887Schin 	fail = e->re.bm.fail;
674887Schin 	mask = e->re.bm.mask;
684887Schin 	complete = e->re.bm.complete && !nmatch;
694887Schin 	exactlen = e->re.bm.size;
704887Schin 	leftlen = e->re.bm.left + exactlen;
714887Schin 	rightlen = exactlen + e->re.bm.right;
724887Schin 	index = leftlen++;
734887Schin 	for (;;)
744887Schin 	{
754887Schin 		while ((index += skip[buf[index]]) < mid);
764887Schin 		if (index < HIT)
774887Schin 			goto impossible;
784887Schin 		index -= HIT;
794887Schin 		m = mask[n = exactlen - 1][buf[index]];
804887Schin 		do
814887Schin 		{
824887Schin 			if (!n--)
834887Schin 				goto possible;
844887Schin 		} while (m &= mask[n][buf[--index]]);
854887Schin 		if ((index += fail[n + 1]) < len)
864887Schin 			continue;
874887Schin  impossible:
884887Schin 		if (inv)
894887Schin 		{
904887Schin 			l = r = buf + len;
914887Schin 			goto invert;
924887Schin 		}
934887Schin 		n = 0;
944887Schin 		goto done;
954887Schin  possible:
964887Schin 		r = (l = buf + index) + exactlen;
974887Schin 		while (l > beg)
984887Schin 			if (*--l == sep)
994887Schin 			{
1004887Schin 				l++;
1014887Schin 				break;
1024887Schin 			}
1034887Schin 		if ((r - l) < leftlen)
1044887Schin 			goto spanned;
1054887Schin 		while (r < end && *r != sep)
1064887Schin 			r++;
1074887Schin 		if ((r - (buf + index)) < rightlen)
1084887Schin 			goto spanned;
1094887Schin 		if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
1104887Schin 		{
1114887Schin 			if (inv)
1124887Schin 			{
1134887Schin  invert:
1144887Schin 				x = beg;
1154887Schin 				while (beg < l)
1164887Schin 				{
1174887Schin 					while (x < l && *x != sep)
1184887Schin 						x++;
1194887Schin 					if (n = (*record)(handle, (char*)beg, x - beg))
1204887Schin 						goto done;
1214887Schin 					beg = ++x;
1224887Schin 				}
1234887Schin 			}
1244887Schin 			else if (n = (*record)(handle, (char*)l, r - l))
1254887Schin 				goto done;
1264887Schin 			if ((index = (r - buf) + leftlen) >= len)
1274887Schin 			{
1284887Schin 				n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
1294887Schin 				goto done;
1304887Schin 			}
1314887Schin 			beg = r + 1;
1324887Schin 		}
1334887Schin 		else if (n != REG_NOMATCH)
1344887Schin 			goto done;
1354887Schin 		else
1364887Schin 		{
1374887Schin  spanned:
1384887Schin 			if ((index += exactlen) >= mid)
1394887Schin 				goto impossible;
1404887Schin 		}
1414887Schin 	}
1424887Schin  done:
1434887Schin 	env->rex = e;
1444887Schin 	return n;
1454887Schin }
146