14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * Phong Vo <kpv@research.att.com> *
204887Schin * *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin
244887Schin /*
254887Schin * posix regex record executor
264887Schin * multiple record sized-buffer interface
274887Schin */
284887Schin
294887Schin #include "reglib.h"
304887Schin
314887Schin /*
324887Schin * call regnexec() on records selected by Boyer-Moore
334887Schin */
344887Schin
354887Schin int
regrexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags,int sep,void * handle,regrecord_t record)364887Schin regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
374887Schin {
384887Schin register unsigned char* buf = (unsigned char*)s;
394887Schin register unsigned char* beg;
404887Schin register unsigned char* l;
414887Schin register unsigned char* r;
424887Schin register unsigned char* x;
434887Schin register size_t* skip;
444887Schin register size_t* fail;
454887Schin register Bm_mask_t** mask;
464887Schin register size_t index;
474887Schin register int n;
484887Schin unsigned char* end;
494887Schin size_t mid;
504887Schin int complete;
514887Schin int exactlen;
524887Schin int leftlen;
534887Schin int rightlen;
544887Schin int inv;
554887Schin Bm_mask_t m;
564887Schin Env_t* env;
574887Schin Rex_t* e;
584887Schin
594887Schin if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
604887Schin return REG_BADPAT;
614887Schin inv = (flags & REG_INVERT) != 0;
624887Schin buf = beg = (unsigned char*)s;
634887Schin end = buf + len;
644887Schin mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
654887Schin skip = e->re.bm.skip;
664887Schin fail = e->re.bm.fail;
674887Schin mask = e->re.bm.mask;
684887Schin complete = e->re.bm.complete && !nmatch;
694887Schin exactlen = e->re.bm.size;
704887Schin leftlen = e->re.bm.left + exactlen;
714887Schin rightlen = exactlen + e->re.bm.right;
724887Schin index = leftlen++;
734887Schin for (;;)
744887Schin {
754887Schin while ((index += skip[buf[index]]) < mid);
764887Schin if (index < HIT)
774887Schin goto impossible;
784887Schin index -= HIT;
794887Schin m = mask[n = exactlen - 1][buf[index]];
804887Schin do
814887Schin {
824887Schin if (!n--)
834887Schin goto possible;
844887Schin } while (m &= mask[n][buf[--index]]);
854887Schin if ((index += fail[n + 1]) < len)
864887Schin continue;
874887Schin impossible:
884887Schin if (inv)
894887Schin {
904887Schin l = r = buf + len;
914887Schin goto invert;
924887Schin }
934887Schin n = 0;
944887Schin goto done;
954887Schin possible:
964887Schin r = (l = buf + index) + exactlen;
974887Schin while (l > beg)
984887Schin if (*--l == sep)
994887Schin {
1004887Schin l++;
1014887Schin break;
1024887Schin }
1034887Schin if ((r - l) < leftlen)
1044887Schin goto spanned;
1054887Schin while (r < end && *r != sep)
1064887Schin r++;
1074887Schin if ((r - (buf + index)) < rightlen)
1084887Schin goto spanned;
1094887Schin if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
1104887Schin {
1114887Schin if (inv)
1124887Schin {
1134887Schin invert:
1144887Schin x = beg;
1154887Schin while (beg < l)
1164887Schin {
1174887Schin while (x < l && *x != sep)
1184887Schin x++;
1194887Schin if (n = (*record)(handle, (char*)beg, x - beg))
1204887Schin goto done;
1214887Schin beg = ++x;
1224887Schin }
1234887Schin }
1244887Schin else if (n = (*record)(handle, (char*)l, r - l))
1254887Schin goto done;
1264887Schin if ((index = (r - buf) + leftlen) >= len)
1274887Schin {
1284887Schin n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
1294887Schin goto done;
1304887Schin }
1314887Schin beg = r + 1;
1324887Schin }
1334887Schin else if (n != REG_NOMATCH)
1344887Schin goto done;
1354887Schin else
1364887Schin {
1374887Schin spanned:
1384887Schin if ((index += exactlen) >= mid)
1394887Schin goto impossible;
1404887Schin }
1414887Schin }
1424887Schin done:
1434887Schin env->rex = e;
1444887Schin return n;
1454887Schin }
146