1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * Phong Vo <kpv@research.att.com> * 20*4887Schin * * 21*4887Schin ***********************************************************************/ 22*4887Schin #pragma prototyped 23*4887Schin 24*4887Schin /* 25*4887Schin * posix regex record executor 26*4887Schin * multiple record sized-buffer interface 27*4887Schin */ 28*4887Schin 29*4887Schin #include "reglib.h" 30*4887Schin 31*4887Schin /* 32*4887Schin * call regnexec() on records selected by Boyer-Moore 33*4887Schin */ 34*4887Schin 35*4887Schin int 36*4887Schin regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record) 37*4887Schin { 38*4887Schin register unsigned char* buf = (unsigned char*)s; 39*4887Schin register unsigned char* beg; 40*4887Schin register unsigned char* l; 41*4887Schin register unsigned char* r; 42*4887Schin register unsigned char* x; 43*4887Schin register size_t* skip; 44*4887Schin register size_t* fail; 45*4887Schin register Bm_mask_t** mask; 46*4887Schin register size_t index; 47*4887Schin register int n; 48*4887Schin unsigned char* end; 49*4887Schin size_t mid; 50*4887Schin int complete; 51*4887Schin int exactlen; 52*4887Schin int leftlen; 53*4887Schin int rightlen; 54*4887Schin int inv; 55*4887Schin Bm_mask_t m; 56*4887Schin Env_t* env; 57*4887Schin Rex_t* e; 58*4887Schin 59*4887Schin if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM) 60*4887Schin return REG_BADPAT; 61*4887Schin inv = (flags & REG_INVERT) != 0; 62*4887Schin buf = beg = (unsigned char*)s; 63*4887Schin end = buf + len; 64*4887Schin mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right); 65*4887Schin skip = e->re.bm.skip; 66*4887Schin fail = e->re.bm.fail; 67*4887Schin mask = e->re.bm.mask; 68*4887Schin complete = e->re.bm.complete && !nmatch; 69*4887Schin exactlen = e->re.bm.size; 70*4887Schin leftlen = e->re.bm.left + exactlen; 71*4887Schin rightlen = exactlen + e->re.bm.right; 72*4887Schin index = leftlen++; 73*4887Schin for (;;) 74*4887Schin { 75*4887Schin while ((index += skip[buf[index]]) < mid); 76*4887Schin if (index < HIT) 77*4887Schin goto impossible; 78*4887Schin index -= HIT; 79*4887Schin m = mask[n = exactlen - 1][buf[index]]; 80*4887Schin do 81*4887Schin { 82*4887Schin if (!n--) 83*4887Schin goto possible; 84*4887Schin } while (m &= mask[n][buf[--index]]); 85*4887Schin if ((index += fail[n + 1]) < len) 86*4887Schin continue; 87*4887Schin impossible: 88*4887Schin if (inv) 89*4887Schin { 90*4887Schin l = r = buf + len; 91*4887Schin goto invert; 92*4887Schin } 93*4887Schin n = 0; 94*4887Schin goto done; 95*4887Schin possible: 96*4887Schin r = (l = buf + index) + exactlen; 97*4887Schin while (l > beg) 98*4887Schin if (*--l == sep) 99*4887Schin { 100*4887Schin l++; 101*4887Schin break; 102*4887Schin } 103*4887Schin if ((r - l) < leftlen) 104*4887Schin goto spanned; 105*4887Schin while (r < end && *r != sep) 106*4887Schin r++; 107*4887Schin if ((r - (buf + index)) < rightlen) 108*4887Schin goto spanned; 109*4887Schin if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags))) 110*4887Schin { 111*4887Schin if (inv) 112*4887Schin { 113*4887Schin invert: 114*4887Schin x = beg; 115*4887Schin while (beg < l) 116*4887Schin { 117*4887Schin while (x < l && *x != sep) 118*4887Schin x++; 119*4887Schin if (n = (*record)(handle, (char*)beg, x - beg)) 120*4887Schin goto done; 121*4887Schin beg = ++x; 122*4887Schin } 123*4887Schin } 124*4887Schin else if (n = (*record)(handle, (char*)l, r - l)) 125*4887Schin goto done; 126*4887Schin if ((index = (r - buf) + leftlen) >= len) 127*4887Schin { 128*4887Schin n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0; 129*4887Schin goto done; 130*4887Schin } 131*4887Schin beg = r + 1; 132*4887Schin } 133*4887Schin else if (n != REG_NOMATCH) 134*4887Schin goto done; 135*4887Schin else 136*4887Schin { 137*4887Schin spanned: 138*4887Schin if ((index += exactlen) >= mid) 139*4887Schin goto impossible; 140*4887Schin } 141*4887Schin } 142*4887Schin done: 143*4887Schin env->rex = e; 144*4887Schin return n; 145*4887Schin } 146