14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 78462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * Phong Vo <kpv@research.att.com> * 204887Schin * * 214887Schin ***********************************************************************/ 224887Schin #pragma prototyped 234887Schin 244887Schin /* 254887Schin * posix regex record executor 264887Schin * multiple record sized-buffer interface 274887Schin */ 284887Schin 294887Schin #include "reglib.h" 304887Schin 314887Schin /* 324887Schin * call regnexec() on records selected by Boyer-Moore 334887Schin */ 344887Schin 354887Schin int 364887Schin regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record) 374887Schin { 384887Schin register unsigned char* buf = (unsigned char*)s; 394887Schin register unsigned char* beg; 404887Schin register unsigned char* l; 414887Schin register unsigned char* r; 424887Schin register unsigned char* x; 434887Schin register size_t* skip; 444887Schin register size_t* fail; 454887Schin register Bm_mask_t** mask; 464887Schin register size_t index; 474887Schin register int n; 484887Schin unsigned char* end; 494887Schin size_t mid; 504887Schin int complete; 514887Schin int exactlen; 524887Schin int leftlen; 534887Schin int rightlen; 544887Schin int inv; 554887Schin Bm_mask_t m; 564887Schin Env_t* env; 574887Schin Rex_t* e; 584887Schin 594887Schin if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM) 604887Schin return REG_BADPAT; 614887Schin inv = (flags & REG_INVERT) != 0; 624887Schin buf = beg = (unsigned char*)s; 634887Schin end = buf + len; 644887Schin mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right); 654887Schin skip = e->re.bm.skip; 664887Schin fail = e->re.bm.fail; 674887Schin mask = e->re.bm.mask; 684887Schin complete = e->re.bm.complete && !nmatch; 694887Schin exactlen = e->re.bm.size; 704887Schin leftlen = e->re.bm.left + exactlen; 714887Schin rightlen = exactlen + e->re.bm.right; 724887Schin index = leftlen++; 734887Schin for (;;) 744887Schin { 754887Schin while ((index += skip[buf[index]]) < mid); 764887Schin if (index < HIT) 774887Schin goto impossible; 784887Schin index -= HIT; 794887Schin m = mask[n = exactlen - 1][buf[index]]; 804887Schin do 814887Schin { 824887Schin if (!n--) 834887Schin goto possible; 844887Schin } while (m &= mask[n][buf[--index]]); 854887Schin if ((index += fail[n + 1]) < len) 864887Schin continue; 874887Schin impossible: 884887Schin if (inv) 894887Schin { 904887Schin l = r = buf + len; 914887Schin goto invert; 924887Schin } 934887Schin n = 0; 944887Schin goto done; 954887Schin possible: 964887Schin r = (l = buf + index) + exactlen; 974887Schin while (l > beg) 984887Schin if (*--l == sep) 994887Schin { 1004887Schin l++; 1014887Schin break; 1024887Schin } 1034887Schin if ((r - l) < leftlen) 1044887Schin goto spanned; 1054887Schin while (r < end && *r != sep) 1064887Schin r++; 1074887Schin if ((r - (buf + index)) < rightlen) 1084887Schin goto spanned; 1094887Schin if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags))) 1104887Schin { 1114887Schin if (inv) 1124887Schin { 1134887Schin invert: 1144887Schin x = beg; 1154887Schin while (beg < l) 1164887Schin { 1174887Schin while (x < l && *x != sep) 1184887Schin x++; 1194887Schin if (n = (*record)(handle, (char*)beg, x - beg)) 1204887Schin goto done; 1214887Schin beg = ++x; 1224887Schin } 1234887Schin } 1244887Schin else if (n = (*record)(handle, (char*)l, r - l)) 1254887Schin goto done; 1264887Schin if ((index = (r - buf) + leftlen) >= len) 1274887Schin { 1284887Schin n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0; 1294887Schin goto done; 1304887Schin } 1314887Schin beg = r + 1; 1324887Schin } 1334887Schin else if (n != REG_NOMATCH) 1344887Schin goto done; 1354887Schin else 1364887Schin { 1374887Schin spanned: 1384887Schin if ((index += exactlen) >= mid) 1394887Schin goto impossible; 1404887Schin } 1414887Schin } 1424887Schin done: 1434887Schin env->rex = e; 1444887Schin return n; 1454887Schin } 146