14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * Phong Vo <kpv@research.att.com> *
204887Schin * *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin
244887Schin /*
254887Schin * posix regex executor
264887Schin * single sized-record interface
274887Schin */
284887Schin
294887Schin #include "reglib.h"
304887Schin
314887Schin #if _AST_REGEX_DEBUG
324887Schin
334887Schin #define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
344887Schin #define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
354887Schin #define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
364887Schin
374887Schin static unsigned long debug;
384887Schin static unsigned long debug_flag;
394887Schin
404887Schin static const char* rexnames[] =
414887Schin {
424887Schin "REX_NULL",
434887Schin "REX_ALT",
444887Schin "REX_ALT_CATCH",
454887Schin "REX_BACK",
464887Schin "REX_BEG",
474887Schin "REX_BEG_STR",
484887Schin "REX_BM",
494887Schin "REX_CAT",
504887Schin "REX_CLASS",
514887Schin "REX_COLL_CLASS",
524887Schin "REX_CONJ",
534887Schin "REX_CONJ_LEFT",
544887Schin "REX_CONJ_RIGHT",
554887Schin "REX_DONE",
564887Schin "REX_DOT",
574887Schin "REX_END",
584887Schin "REX_END_STR",
594887Schin "REX_EXEC",
604887Schin "REX_FIN_STR",
614887Schin "REX_GROUP",
624887Schin "REX_GROUP_CATCH",
634887Schin "REX_GROUP_AHEAD",
644887Schin "REX_GROUP_AHEAD_CATCH",
654887Schin "REX_GROUP_AHEAD_NOT",
664887Schin "REX_GROUP_BEHIND",
674887Schin "REX_GROUP_BEHIND_CATCH",
684887Schin "REX_GROUP_BEHIND_NOT",
694887Schin "REX_GROUP_BEHIND_NOT_CATCH",
704887Schin "REX_GROUP_COND",
714887Schin "REX_GROUP_COND_CATCH",
724887Schin "REX_GROUP_CUT",
734887Schin "REX_GROUP_CUT_CATCH",
744887Schin "REX_KMP",
754887Schin "REX_NEG",
764887Schin "REX_NEG_CATCH",
774887Schin "REX_NEST",
784887Schin "REX_ONECHAR",
794887Schin "REX_REP",
804887Schin "REX_REP_CATCH",
814887Schin "REX_STRING",
824887Schin "REX_TRIE",
834887Schin "REX_WBEG",
844887Schin "REX_WEND",
854887Schin "REX_WORD",
864887Schin "REX_WORD_NOT"
874887Schin };
884887Schin
rexname(Rex_t * rex)894887Schin static const char* rexname(Rex_t* rex)
904887Schin {
914887Schin if (!rex)
924887Schin return "NIL";
934887Schin if (rex->type >= elementsof(rexnames))
944887Schin return "ERROR";
954887Schin return rexnames[rex->type];
964887Schin }
974887Schin
984887Schin #else
994887Schin
1004887Schin #define DEBUG_INIT()
1014887Schin #define DEBUG_TEST(f,y,n) (n)
1024887Schin #define DEBUG_CODE(f,y,n) do {n} while(0)
1034887Schin
1044887Schin #endif
1054887Schin
1064887Schin #define BEG_ALT 1 /* beginning of an alt */
1074887Schin #define BEG_ONE 2 /* beginning of one iteration of a rep */
1084887Schin #define BEG_REP 3 /* beginning of a repetition */
1094887Schin #define BEG_SUB 4 /* beginning of a subexpression */
1104887Schin #define END_ANY 5 /* end of any of above */
1114887Schin
1124887Schin /*
1134887Schin * returns from parse()
1144887Schin */
1154887Schin
1164887Schin #define NONE 0 /* no parse found */
1174887Schin #define GOOD 1 /* some parse was found */
1184887Schin #define CUT 2 /* no match and no backtrack */
1194887Schin #define BEST 3 /* an unbeatable parse was found */
1204887Schin #define BAD 4 /* error ocurred */
1214887Schin
1224887Schin /*
1234887Schin * REG_SHELL_DOT test
1244887Schin */
1254887Schin
1264887Schin #define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
1274887Schin
1284887Schin /*
1294887Schin * Pos_t is for comparing parses. An entry is made in the
1304887Schin * array at the beginning and at the end of each Group_t,
1314887Schin * each iteration in a Group_t, and each Binary_t.
1324887Schin */
1334887Schin
1344887Schin typedef struct
1354887Schin {
1364887Schin unsigned char* p; /* where in string */
1374887Schin size_t length; /* length in string */
1384887Schin short serial; /* preorder subpattern number */
1394887Schin short be; /* which end of pair */
1404887Schin } Pos_t;
1414887Schin
1424887Schin /* ===== begin library support ===== */
1434887Schin
1444887Schin #define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
1454887Schin
1464887Schin static Vector_t*
vecopen(int inc,int siz)1474887Schin vecopen(int inc, int siz)
1484887Schin {
1494887Schin Vector_t* v;
1504887Schin Stk_t* sp;
1514887Schin
1524887Schin if (inc <= 0)
1534887Schin inc = 16;
1544887Schin if (!(sp = stkopen(STK_SMALL|STK_NULL)))
1554887Schin return 0;
1564887Schin if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
1574887Schin {
1584887Schin stkclose(sp);
1594887Schin return 0;
1604887Schin }
1614887Schin v->stk = sp;
1624887Schin v->vec = (char*)v + sizeof(Vector_t);
1634887Schin v->max = v->inc = inc;
1644887Schin v->siz = siz;
1654887Schin v->cur = 0;
1664887Schin return v;
1674887Schin }
1684887Schin
1694887Schin static void*
vecseek(Vector_t ** p,int index)1704887Schin vecseek(Vector_t** p, int index)
1714887Schin {
1724887Schin Vector_t* v = *p;
1734887Schin
1744887Schin if (index >= v->max)
1754887Schin {
1764887Schin while ((v->max += v->inc) <= index);
1774887Schin if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
1784887Schin return 0;
1794887Schin *p = v;
1804887Schin v->vec = (char*)v + sizeof(Vector_t);
1814887Schin }
1824887Schin return v->vec + index * v->siz;
1834887Schin }
1844887Schin
1854887Schin static void
vecclose(Vector_t * v)1864887Schin vecclose(Vector_t* v)
1874887Schin {
1884887Schin if (v)
1894887Schin stkclose(v->stk);
1904887Schin }
1914887Schin
1924887Schin typedef struct
1934887Schin {
1944887Schin Stk_pos_t pos;
1954887Schin char data[1];
1964887Schin } Stk_frame_t;
1974887Schin
1984887Schin #define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
1994887Schin #define stkold(s,p) stkset(s,(p)->base,(p)->offset)
2004887Schin
2014887Schin #define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
2024887Schin #define stkdata(s,t) ((t*)stkframe(s)->data)
2034887Schin #define stkpop(s) stkold(s,&(stkframe(s)->pos))
2044887Schin
2054887Schin static void*
stkpush(Stk_t * sp,size_t size)2064887Schin stkpush(Stk_t* sp, size_t size)
2074887Schin {
2084887Schin Stk_frame_t* f;
2094887Schin Stk_pos_t p;
2104887Schin
2114887Schin stknew(sp, &p);
2124887Schin size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
2134887Schin if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
2144887Schin return 0;
2154887Schin f->pos = p;
2164887Schin stkframe(sp) = f;
2174887Schin return f->data;
2184887Schin }
2194887Schin
2204887Schin /* ===== end library support ===== */
2214887Schin
2224887Schin /*
2234887Schin * Match_frame_t is for saving and restoring match records
2244887Schin * around alternate attempts, so that fossils will not be
2254887Schin * left in the match array. These are the only entries in
2264887Schin * the match array that are not otherwise guaranteed to
2274887Schin * have current data in them when they get used.
2284887Schin */
2294887Schin
2304887Schin typedef struct
2314887Schin {
2324887Schin size_t size;
2334887Schin regmatch_t* match;
2344887Schin regmatch_t save[1];
2354887Schin } Match_frame_t;
2364887Schin
2374887Schin #define matchframe stkdata(stkstd,Match_frame_t)
2384887Schin #define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
2394887Schin #define matchcopy(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
2404887Schin #define matchpop(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
2414887Schin
2424887Schin #define pospop(e) (--(e)->pos->cur)
2434887Schin
2444887Schin /*
2454887Schin * allocate a frame and push a match onto the stack
2464887Schin */
2474887Schin
2484887Schin static int
_matchpush(Env_t * env,Rex_t * rex)2494887Schin _matchpush(Env_t* env, Rex_t* rex)
2504887Schin {
2514887Schin Match_frame_t* f;
2524887Schin regmatch_t* m;
2534887Schin regmatch_t* e;
2544887Schin regmatch_t* s;
2554887Schin int num;
2564887Schin
2574887Schin if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
2584887Schin num = 0;
2594887Schin if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
2604887Schin {
2614887Schin env->error = REG_ESPACE;
2624887Schin return 1;
2634887Schin }
2644887Schin f->size = num * sizeof(regmatch_t);
2654887Schin f->match = m = env->match + rex->re.group.number;
2664887Schin e = m + num;
2674887Schin s = f->save;
2684887Schin while (m < e)
2694887Schin {
2704887Schin *s++ = *m;
2714887Schin *m++ = state.nomatch;
2724887Schin }
2734887Schin return 0;
2744887Schin }
2754887Schin
2764887Schin /*
2774887Schin * allocate a frame and push a pos onto the stack
2784887Schin */
2794887Schin
2804887Schin static int
pospush(Env_t * env,Rex_t * rex,unsigned char * p,int be)2814887Schin pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
2824887Schin {
2834887Schin Pos_t* pos;
2844887Schin
2854887Schin if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
2864887Schin {
2874887Schin env->error = REG_ESPACE;
2884887Schin return 1;
2894887Schin }
2904887Schin pos->serial = rex->serial;
2914887Schin pos->p = p;
2924887Schin pos->be = be;
2934887Schin env->pos->cur++;
2944887Schin return 0;
2954887Schin }
2964887Schin
2974887Schin /*
2984887Schin * two matches are known to have the same length
2994887Schin * os is start of old pos array, ns is start of new,
3004887Schin * oend and nend are end+1 pointers to ends of arrays.
3014887Schin * oe and ne are ends (not end+1) of subarrays.
3024887Schin * returns 1 if new is better, -1 if old, else 0.
3034887Schin */
3044887Schin
3054887Schin static int
better(Env_t * env,Pos_t * os,Pos_t * ns,Pos_t * oend,Pos_t * nend,int level)3064887Schin better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
3074887Schin {
3084887Schin Pos_t* oe;
3094887Schin Pos_t* ne;
3104887Schin int k;
3114887Schin int n;
3124887Schin
3134887Schin if (env->error)
3144887Schin return -1;
3154887Schin for (;;)
3164887Schin {
3174887Schin DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
3184887Schin if (ns >= nend)
3194887Schin return DEBUG_TEST(0x8000,(os < oend),(0));
3204887Schin if (os >= oend)
3214887Schin return DEBUG_TEST(0x8000,(-1),(1));
3224887Schin n = os->serial;
3234887Schin if (ns->serial > n)
3244887Schin return -1;
3254887Schin if (n > ns->serial)
3264887Schin {
3274887Schin env->error = REG_PANIC;
3284887Schin return -1;
3294887Schin }
3304887Schin if (ns->p > os->p)
3314887Schin return 1;
3324887Schin if (os->p > ns->p)
3334887Schin return -1;
3344887Schin oe = os;
3354887Schin k = 0;
3364887Schin for (;;)
3374887Schin if ((++oe)->serial == n)
3384887Schin {
3394887Schin if (oe->be != END_ANY)
3404887Schin k++;
3414887Schin else if (k-- <= 0)
3424887Schin break;
3434887Schin }
3444887Schin ne = ns;
3454887Schin k = 0;
3464887Schin for (;;)
3474887Schin if ((++ne)->serial == n)
3484887Schin {
3494887Schin if (ne->be != END_ANY)
3504887Schin k++;
3514887Schin else if (k-- <= 0)
3524887Schin break;
3534887Schin }
3544887Schin if (ne->p > oe->p)
3554887Schin return 1;
3564887Schin if (oe->p > ne->p)
3574887Schin return -1;
3584887Schin if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
3594887Schin return k;
3604887Schin os = oe + 1;
3614887Schin ns = ne + 1;
3624887Schin }
3634887Schin }
3644887Schin
3658462SApril.Chin@Sun.COM #if _AST_REGEX_DEBUG
3668462SApril.Chin@Sun.COM
3678462SApril.Chin@Sun.COM static void
showmatch(regmatch_t * p)3688462SApril.Chin@Sun.COM showmatch(regmatch_t* p)
3698462SApril.Chin@Sun.COM {
3708462SApril.Chin@Sun.COM sfputc(sfstdout, '(');
3718462SApril.Chin@Sun.COM if (p->rm_so < 0)
3728462SApril.Chin@Sun.COM sfputc(sfstdout, '?');
3738462SApril.Chin@Sun.COM else
3748462SApril.Chin@Sun.COM sfprintf(sfstdout, "%d", p->rm_so);
3758462SApril.Chin@Sun.COM sfputc(sfstdout, ',');
3768462SApril.Chin@Sun.COM if (p->rm_eo < 0)
3778462SApril.Chin@Sun.COM sfputc(sfstdout, '?');
3788462SApril.Chin@Sun.COM else
3798462SApril.Chin@Sun.COM sfprintf(sfstdout, "%d", p->rm_eo);
3808462SApril.Chin@Sun.COM sfputc(sfstdout, ')');
3818462SApril.Chin@Sun.COM }
3828462SApril.Chin@Sun.COM
3838462SApril.Chin@Sun.COM static int
_better(Env_t * env,Pos_t * os,Pos_t * ns,Pos_t * oend,Pos_t * nend,int level)3848462SApril.Chin@Sun.COM _better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
3858462SApril.Chin@Sun.COM {
3868462SApril.Chin@Sun.COM int i;
3878462SApril.Chin@Sun.COM
3888462SApril.Chin@Sun.COM DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
3898462SApril.Chin@Sun.COM i = better(env, os, ns, oend, nend, 0);
3908462SApril.Chin@Sun.COM DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
3918462SApril.Chin@Sun.COM return i;
3928462SApril.Chin@Sun.COM }
3938462SApril.Chin@Sun.COM
3948462SApril.Chin@Sun.COM #define better _better
3958462SApril.Chin@Sun.COM
3968462SApril.Chin@Sun.COM #endif
3974887Schin
3984887Schin #define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
3994887Schin
4004887Schin static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
4014887Schin
4024887Schin static int
parserep(Env_t * env,Rex_t * rex,Rex_t * cont,unsigned char * s,int n)4034887Schin parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
4044887Schin {
4054887Schin int i;
4064887Schin int r = NONE;
4074887Schin Rex_t catcher;
4084887Schin
40910898Sroland.mainz@nrubsig.org DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
4104887Schin if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
4114887Schin {
4124887Schin if (env->stack && pospush(env, rex, s, END_ANY))
4134887Schin return BAD;
4144887Schin i = follow(env, rex, cont, s);
4154887Schin if (env->stack)
4164887Schin pospop(env);
4174887Schin switch (i)
4184887Schin {
4194887Schin case BAD:
4204887Schin return BAD;
4214887Schin case CUT:
4224887Schin return CUT;
4234887Schin case BEST:
4244887Schin case GOOD:
4254887Schin return BEST;
4264887Schin }
4274887Schin }
4284887Schin if (n < rex->hi)
4294887Schin {
4304887Schin catcher.type = REX_REP_CATCH;
4314887Schin catcher.serial = rex->serial;
4324887Schin catcher.re.rep_catch.ref = rex;
4334887Schin catcher.re.rep_catch.cont = cont;
4344887Schin catcher.re.rep_catch.beg = s;
4354887Schin catcher.re.rep_catch.n = n + 1;
4364887Schin catcher.next = rex->next;
4374887Schin if (n == 0)
4384887Schin rex->re.rep_catch.beg = s;
4394887Schin if (env->stack)
4404887Schin {
4414887Schin if (matchpush(env, rex))
4424887Schin return BAD;
4434887Schin if (pospush(env, rex, s, BEG_ONE))
4444887Schin return BAD;
44510898Sroland.mainz@nrubsig.org DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
4464887Schin }
4474887Schin r = parse(env, rex->re.group.expr.rex, &catcher, s);
44810898Sroland.mainz@nrubsig.org DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
4494887Schin if (env->stack)
4504887Schin {
4514887Schin pospop(env);
4524887Schin matchpop(env, rex);
45310898Sroland.mainz@nrubsig.org DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP %d %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
4544887Schin }
4554887Schin switch (r)
4564887Schin {
4574887Schin case BAD:
4584887Schin return BAD;
4594887Schin case BEST:
4604887Schin return BEST;
4614887Schin case CUT:
4624887Schin r = NONE;
4634887Schin break;
4644887Schin case GOOD:
4654887Schin if (rex->flags & REG_MINIMAL)
4664887Schin return BEST;
4674887Schin r = GOOD;
4684887Schin break;
4694887Schin }
4704887Schin }
4714887Schin if (n < rex->lo)
4724887Schin return r;
4734887Schin if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
4744887Schin {
4754887Schin if (env->stack && pospush(env, rex, s, END_ANY))
4764887Schin return BAD;
4774887Schin i = follow(env, rex, cont, s);
4784887Schin if (env->stack)
4794887Schin pospop(env);
4804887Schin switch (i)
4814887Schin {
4824887Schin case BAD:
4834887Schin r = BAD;
4844887Schin break;
4854887Schin case CUT:
4864887Schin r = CUT;
4874887Schin break;
4884887Schin case BEST:
4894887Schin r = BEST;
4904887Schin break;
4914887Schin case GOOD:
4924887Schin r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
4934887Schin break;
4944887Schin }
4954887Schin }
4964887Schin return r;
4974887Schin }
4984887Schin
4994887Schin static int
parsetrie(Env_t * env,Trie_node_t * x,Rex_t * rex,Rex_t * cont,unsigned char * s)5004887Schin parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
5014887Schin {
5024887Schin unsigned char* p;
5034887Schin int r;
5044887Schin
5054887Schin if (p = rex->map)
5064887Schin {
5074887Schin for (;;)
5084887Schin {
5094887Schin if (s >= env->end)
5104887Schin return NONE;
5114887Schin while (x->c != p[*s])
5124887Schin if (!(x = x->sib))
5134887Schin return NONE;
5144887Schin if (x->end)
5154887Schin break;
5164887Schin x = x->son;
5174887Schin s++;
5184887Schin }
5194887Schin }
5204887Schin else
5214887Schin {
5224887Schin for (;;)
5234887Schin {
5244887Schin if (s >= env->end)
5254887Schin return NONE;
5264887Schin while (x->c != *s)
5274887Schin if (!(x = x->sib))
5284887Schin return NONE;
5294887Schin if (x->end)
5304887Schin break;
5314887Schin x = x->son;
5324887Schin s++;
5334887Schin }
5344887Schin }
5354887Schin s++;
5364887Schin if (rex->flags & REG_MINIMAL)
5374887Schin switch (follow(env, rex, cont, s))
5384887Schin {
5394887Schin case BAD:
5404887Schin return BAD;
5414887Schin case CUT:
5424887Schin return CUT;
5434887Schin case BEST:
5444887Schin case GOOD:
5454887Schin return BEST;
5464887Schin }
5474887Schin if (x->son)
5484887Schin switch (parsetrie(env, x->son, rex, cont, s))
5494887Schin {
5504887Schin case BAD:
5514887Schin return BAD;
5524887Schin case CUT:
5534887Schin return CUT;
5544887Schin case BEST:
5554887Schin return BEST;
5564887Schin case GOOD:
5574887Schin if (rex->flags & REG_MINIMAL)
5584887Schin return BEST;
5594887Schin r = GOOD;
5604887Schin break;
5614887Schin default:
5624887Schin r = NONE;
5634887Schin break;
5644887Schin }
5654887Schin else
5664887Schin r = NONE;
5674887Schin if (!(rex->flags & REG_MINIMAL))
5684887Schin switch (follow(env, rex, cont, s))
5694887Schin {
5704887Schin case BAD:
5714887Schin return BAD;
5724887Schin case CUT:
5734887Schin return CUT;
5744887Schin case BEST:
5754887Schin return BEST;
5764887Schin case GOOD:
5774887Schin return GOOD;
5784887Schin }
5794887Schin return r;
5804887Schin }
5814887Schin
5824887Schin static int
collelt(register Celt_t * ce,char * key,int c,int x)5834887Schin collelt(register Celt_t* ce, char* key, int c, int x)
5844887Schin {
5854887Schin Ckey_t elt;
5864887Schin
5874887Schin mbxfrm(elt, key, COLL_KEY_MAX);
5884887Schin for (;; ce++)
5894887Schin {
5904887Schin switch (ce->typ)
5914887Schin {
5924887Schin case COLL_call:
5934887Schin if (!x && (*ce->fun)(c))
5944887Schin return 1;
5954887Schin continue;
5964887Schin case COLL_char:
5974887Schin if (!strcmp((char*)ce->beg, (char*)elt))
5984887Schin return 1;
5994887Schin continue;
6004887Schin case COLL_range:
6014887Schin if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
6024887Schin return 1;
6034887Schin continue;
6044887Schin case COLL_range_lc:
6054887Schin if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
6064887Schin return 1;
6074887Schin continue;
6084887Schin case COLL_range_uc:
6094887Schin if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
6104887Schin return 1;
6114887Schin continue;
6124887Schin }
6134887Schin break;
6144887Schin }
6154887Schin return 0;
6164887Schin }
6174887Schin
6184887Schin static int
collic(register Celt_t * ce,char * key,register char * nxt,int c,int x)6194887Schin collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
6204887Schin {
6214887Schin if (!x)
6224887Schin {
6234887Schin if (collelt(ce, key, c, x))
6244887Schin return 1;
6254887Schin if (iswlower(c))
6264887Schin c = towupper(c);
6274887Schin else if (iswupper(c))
6284887Schin c = towlower(c);
6294887Schin else
6304887Schin return 0;
631*12068SRoger.Faulkner@Oracle.COM x = mbconv(key, c);
6324887Schin key[x] = 0;
6334887Schin return collelt(ce, key, c, 0);
6344887Schin }
6354887Schin while (*nxt)
6364887Schin {
6374887Schin if (collic(ce, key, nxt + 1, c, x))
6384887Schin return 1;
6394887Schin if (islower(*nxt))
6404887Schin *nxt = toupper(*nxt);
6414887Schin else if (isupper(*nxt))
6424887Schin *nxt = tolower(*nxt);
6434887Schin else
6444887Schin return 0;
6454887Schin nxt++;
6464887Schin }
6474887Schin return collelt(ce, key, c, x);
6484887Schin }
6494887Schin
6504887Schin static int
collmatch(Rex_t * rex,unsigned char * s,unsigned char * e,unsigned char ** p)6514887Schin collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
6524887Schin {
6534887Schin unsigned char* t;
6544887Schin wchar_t c;
6554887Schin int w;
6564887Schin int r;
6574887Schin int x;
6584887Schin int ic;
6594887Schin Ckey_t key;
6604887Schin Ckey_t elt;
6614887Schin
6624887Schin ic = (rex->flags & REG_ICASE);
6634887Schin if ((w = MBSIZE(s)) > 1)
6644887Schin {
6654887Schin memcpy((char*)key, (char*)s, w);
6664887Schin key[w] = 0;
6674887Schin t = s;
6684887Schin c = mbchar(t);
6694887Schin #if !_lib_wctype
6704887Schin c &= 0xff;
6714887Schin #endif
6724887Schin x = 0;
6734887Schin }
6744887Schin else
6754887Schin {
6764887Schin c = s[0];
6774887Schin if (ic && isupper(c))
6784887Schin c = tolower(c);
6794887Schin key[0] = c;
6804887Schin key[1] = 0;
6814887Schin if (isalpha(c))
6824887Schin {
6834887Schin x = e - s;
6844887Schin if (x > COLL_KEY_MAX)
6854887Schin x = COLL_KEY_MAX;
6864887Schin while (w < x)
6874887Schin {
6884887Schin c = s[w];
6894887Schin if (!isalpha(c))
6904887Schin break;
6914887Schin r = mbxfrm(elt, key, COLL_KEY_MAX);
6924887Schin if (ic && isupper(c))
6934887Schin c = tolower(c);
6944887Schin key[w] = c;
6954887Schin key[w + 1] = 0;
6964887Schin if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
6974887Schin break;
6984887Schin w++;
6994887Schin }
7004887Schin }
7014887Schin key[w] = 0;
7024887Schin c = key[0];
7034887Schin x = w - 1;
7044887Schin }
7054887Schin r = 1;
7064887Schin for (;;)
7074887Schin {
7084887Schin if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
7094887Schin break;
7104887Schin if (!x)
7114887Schin {
7124887Schin r = 0;
7134887Schin break;
7144887Schin }
7154887Schin w = x--;
7164887Schin key[w] = 0;
7174887Schin }
7184887Schin *p = s + w;
7194887Schin return rex->re.collate.invert ? !r : r;
7204887Schin }
7214887Schin
7224887Schin static unsigned char*
nestmatch(register unsigned char * s,register unsigned char * e,const unsigned short * type,register int co)7234887Schin nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
7244887Schin {
7254887Schin register int c;
7264887Schin register int cc;
7274887Schin unsigned int n;
7284887Schin int oc;
7294887Schin
7304887Schin if (type[co] & (REX_NEST_literal|REX_NEST_quote))
7314887Schin {
7324887Schin n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
7334887Schin while (s < e)
7344887Schin {
7354887Schin c = *s++;
7364887Schin if (c == co)
7374887Schin return s;
7384887Schin else if (type[c] & n)
7394887Schin {
7404887Schin if (s >= e || (type[c] & REX_NEST_terminator))
7414887Schin break;
7424887Schin s++;
7434887Schin }
7444887Schin }
7454887Schin }
7464887Schin else
7474887Schin {
7484887Schin cc = type[co] >> REX_NEST_SHIFT;
7494887Schin oc = type[co] & (REX_NEST_open|REX_NEST_close);
7504887Schin n = 1;
7514887Schin while (s < e)
7524887Schin {
7534887Schin c = *s++;
7544887Schin switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
7554887Schin {
7564887Schin case REX_NEST_delimiter:
7574887Schin case REX_NEST_terminator:
7584887Schin return oc ? 0 : s;
7594887Schin case REX_NEST_separator:
7604887Schin if (!oc)
7614887Schin return s;
7624887Schin break;
7634887Schin case REX_NEST_escape:
7644887Schin if (s >= e)
7654887Schin return 0;
7664887Schin s++;
7674887Schin break;
7684887Schin case REX_NEST_open|REX_NEST_close:
7694887Schin if (c == cc)
7704887Schin {
7714887Schin if (!--n)
7724887Schin return s;
7734887Schin }
7744887Schin /*FALLTHROUGH*/
7754887Schin case REX_NEST_open:
7764887Schin if (c == co)
7774887Schin {
7784887Schin if (!++n)
7794887Schin return 0;
7804887Schin }
7814887Schin else if (!(s = nestmatch(s, e, type, c)))
7824887Schin return 0;
7834887Schin break;
7844887Schin case REX_NEST_close:
7854887Schin if (c != cc)
7864887Schin return 0;
7874887Schin if (!--n)
7884887Schin return s;
7894887Schin break;
7904887Schin }
7914887Schin }
7924887Schin return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
7934887Schin }
7944887Schin return 0;
7954887Schin }
7964887Schin
7974887Schin static int
parse(Env_t * env,Rex_t * rex,Rex_t * cont,unsigned char * s)7984887Schin parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
7994887Schin {
8004887Schin int c;
8014887Schin int d;
8024887Schin int i;
8034887Schin int m;
8044887Schin int n;
8054887Schin int r;
8064887Schin int* f;
8074887Schin unsigned char* p;
8084887Schin unsigned char* t;
8094887Schin unsigned char* b;
8104887Schin unsigned char* e;
8114887Schin char* u;
8124887Schin regmatch_t* o;
8134887Schin Trie_node_t* x;
8144887Schin Rex_t* q;
8154887Schin Rex_t catcher;
8164887Schin Rex_t next;
8174887Schin
8184887Schin for (;;)
8194887Schin {
8204887Schin DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
8214887Schin switch (rex->type)
8224887Schin {
8234887Schin case REX_ALT:
8244887Schin if (env->stack)
8254887Schin {
8264887Schin if (matchpush(env, rex))
8274887Schin return BAD;
8284887Schin if (pospush(env, rex, s, BEG_ALT))
8294887Schin return BAD;
8304887Schin catcher.type = REX_ALT_CATCH;
8314887Schin catcher.serial = rex->serial;
8324887Schin catcher.re.alt_catch.cont = cont;
8334887Schin catcher.next = rex->next;
8344887Schin r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
8354887Schin if (r < BEST || (rex->flags & REG_MINIMAL))
8364887Schin {
8374887Schin matchcopy(env, rex);
8384887Schin ((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
8394887Schin n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
8404887Schin if (n != NONE)
8414887Schin r = n;
8424887Schin }
8434887Schin pospop(env);
8444887Schin matchpop(env, rex);
8454887Schin }
8464887Schin else
8474887Schin {
8484887Schin if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
8494887Schin r = parse(env, rex->re.group.expr.binary.right, cont, s);
8504887Schin if (r == GOOD)
8514887Schin r = BEST;
8524887Schin }
8534887Schin return r;
8544887Schin case REX_ALT_CATCH:
8554887Schin if (pospush(env, rex, s, END_ANY))
8564887Schin return BAD;
8574887Schin r = follow(env, rex, rex->re.alt_catch.cont, s);
8584887Schin pospop(env);
8594887Schin return r;
8604887Schin case REX_BACK:
8614887Schin o = &env->match[rex->lo];
8624887Schin if (o->rm_so < 0)
8634887Schin return NONE;
8644887Schin i = o->rm_eo - o->rm_so;
8654887Schin e = s + i;
8664887Schin if (e > env->end)
8674887Schin return NONE;
8684887Schin t = env->beg + o->rm_so;
8694887Schin if (!(p = rex->map))
8704887Schin {
8714887Schin while (s < e)
8724887Schin if (*s++ != *t++)
8734887Schin return NONE;
8744887Schin }
8754887Schin else if (!mbwide())
8764887Schin {
8774887Schin while (s < e)
8784887Schin if (p[*s++] != p[*t++])
8794887Schin return NONE;
8804887Schin }
8814887Schin else
8824887Schin {
8834887Schin while (s < e)
8844887Schin {
8854887Schin c = mbchar(s);
8864887Schin d = mbchar(t);
8874887Schin if (towupper(c) != towupper(d))
8884887Schin return NONE;
8894887Schin }
8904887Schin }
8914887Schin break;
8924887Schin case REX_BEG:
8934887Schin if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
8944887Schin return NONE;
8954887Schin break;
8964887Schin case REX_CLASS:
8974887Schin if (LEADING(env, rex, s))
8984887Schin return NONE;
8994887Schin n = rex->hi;
9004887Schin if (n > env->end - s)
9014887Schin n = env->end - s;
9024887Schin m = rex->lo;
9034887Schin if (m > n)
9044887Schin return NONE;
9054887Schin r = NONE;
9064887Schin if (!(rex->flags & REG_MINIMAL))
9074887Schin {
9084887Schin for (i = 0; i < n; i++)
9094887Schin if (!settst(rex->re.charclass, s[i]))
9104887Schin {
9114887Schin n = i;
9124887Schin break;
9134887Schin }
9144887Schin for (s += n; n-- >= m; s--)
9154887Schin switch (follow(env, rex, cont, s))
9164887Schin {
9174887Schin case BAD:
9184887Schin return BAD;
9194887Schin case CUT:
9204887Schin return CUT;
9214887Schin case BEST:
9224887Schin return BEST;
9234887Schin case GOOD:
9244887Schin r = GOOD;
9254887Schin break;
9264887Schin }
9274887Schin }
9284887Schin else
9294887Schin {
9304887Schin for (e = s + m; s < e; s++)
9314887Schin if (!settst(rex->re.charclass, *s))
9324887Schin return r;
9334887Schin e += n - m;
9344887Schin for (;;)
9354887Schin {
9364887Schin switch (follow(env, rex, cont, s))
9374887Schin {
9384887Schin case BAD:
9394887Schin return BAD;
9404887Schin case CUT:
9414887Schin return CUT;
9424887Schin case BEST:
9434887Schin case GOOD:
9444887Schin return BEST;
9454887Schin }
9464887Schin if (s >= e || !settst(rex->re.charclass, *s))
9474887Schin break;
9484887Schin s++;
9494887Schin }
9504887Schin }
9514887Schin return r;
9524887Schin case REX_COLL_CLASS:
9534887Schin if (LEADING(env, rex, s))
9544887Schin return NONE;
9554887Schin n = rex->hi;
9564887Schin if (n > env->end - s)
9574887Schin n = env->end - s;
9584887Schin m = rex->lo;
9594887Schin if (m > n)
9604887Schin return NONE;
9614887Schin r = NONE;
9624887Schin e = env->end;
9634887Schin if (!(rex->flags & REG_MINIMAL))
9644887Schin {
9654887Schin if (!(b = (unsigned char*)stkpush(stkstd, n)))
9664887Schin {
9674887Schin env->error = REG_ESPACE;
9684887Schin return BAD;
9694887Schin }
9704887Schin for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
9714887Schin {
9724887Schin b[i] = t - s;
9734887Schin s = t;
9744887Schin }
9754887Schin for (; i-- >= rex->lo; s -= b[i])
9764887Schin switch (follow(env, rex, cont, s))
9774887Schin {
9784887Schin case BAD:
9794887Schin stkpop(stkstd);
9804887Schin return BAD;
9814887Schin case CUT:
9824887Schin stkpop(stkstd);
9834887Schin return CUT;
9844887Schin case BEST:
9854887Schin stkpop(stkstd);
9864887Schin return BEST;
9874887Schin case GOOD:
9884887Schin r = GOOD;
9894887Schin break;
9904887Schin }
9914887Schin stkpop(stkstd);
9924887Schin }
9934887Schin else
9944887Schin {
9954887Schin for (i = 0; i < m && s < e; i++, s = t)
9964887Schin if (!collmatch(rex, s, e, &t))
9974887Schin return r;
9984887Schin while (i++ <= n)
9994887Schin {
10004887Schin switch (follow(env, rex, cont, s))
10014887Schin {
10024887Schin case BAD:
10034887Schin return BAD;
10044887Schin case CUT:
10054887Schin return CUT;
10064887Schin case BEST:
10074887Schin case GOOD:
10084887Schin return BEST;
10094887Schin }
10104887Schin if (s >= e || !collmatch(rex, s, e, &s))
10114887Schin break;
10124887Schin }
10134887Schin }
10144887Schin return r;
10154887Schin case REX_CONJ:
10164887Schin next.type = REX_CONJ_RIGHT;
10174887Schin next.re.conj_right.cont = cont;
10184887Schin next.next = rex->next;
10194887Schin catcher.type = REX_CONJ_LEFT;
10204887Schin catcher.re.conj_left.right = rex->re.group.expr.binary.right;
10214887Schin catcher.re.conj_left.cont = &next;
10224887Schin catcher.re.conj_left.beg = s;
10234887Schin catcher.next = 0;
10244887Schin return parse(env, rex->re.group.expr.binary.left, &catcher, s);
10254887Schin case REX_CONJ_LEFT:
10264887Schin rex->re.conj_left.cont->re.conj_right.end = s;
10274887Schin cont = rex->re.conj_left.cont;
10284887Schin s = rex->re.conj_left.beg;
10294887Schin rex = rex->re.conj_left.right;
10304887Schin continue;
10314887Schin case REX_CONJ_RIGHT:
10324887Schin if (rex->re.conj_right.end != s)
10334887Schin return NONE;
10344887Schin cont = rex->re.conj_right.cont;
10354887Schin break;
10364887Schin case REX_DONE:
10374887Schin if (!env->stack)
10384887Schin return BEST;
10394887Schin n = s - env->beg;
10404887Schin r = env->nsub;
10414887Schin DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
10424887Schin if ((i = env->best[0].rm_eo) >= 0)
10434887Schin {
10444887Schin if (rex->flags & REG_MINIMAL)
10454887Schin {
10464887Schin if (n > i)
10474887Schin return GOOD;
10484887Schin }
10494887Schin else
10504887Schin {
10514887Schin if (n < i)
10524887Schin return GOOD;
10534887Schin }
10544887Schin if (n == i && better(env,
10554887Schin (Pos_t*)env->bestpos->vec,
10564887Schin (Pos_t*)env->pos->vec,
10574887Schin (Pos_t*)env->bestpos->vec+env->bestpos->cur,
10584887Schin (Pos_t*)env->pos->vec+env->pos->cur,
10594887Schin 0) <= 0)
10604887Schin return GOOD;
10614887Schin }
10624887Schin env->best[0].rm_eo = n;
10634887Schin memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
10644887Schin n = env->pos->cur;
10654887Schin if (!vector(Pos_t, env->bestpos, n))
10664887Schin {
10674887Schin env->error = REG_ESPACE;
10684887Schin return BAD;
10694887Schin }
10704887Schin env->bestpos->cur = n;
10714887Schin memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
10724887Schin DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
10734887Schin return GOOD;
10744887Schin case REX_DOT:
10754887Schin if (LEADING(env, rex, s))
10764887Schin return NONE;
10774887Schin n = rex->hi;
10784887Schin if (n > env->end - s)
10794887Schin n = env->end - s;
10804887Schin m = rex->lo;
10814887Schin if (m > n)
10824887Schin return NONE;
10834887Schin if ((c = rex->explicit) >= 0 && !mbwide())
10844887Schin for (i = 0; i < n; i++)
10854887Schin if (s[i] == c)
10864887Schin {
10874887Schin n = i;
10884887Schin break;
10894887Schin }
10904887Schin r = NONE;
10914887Schin if (!(rex->flags & REG_MINIMAL))
10924887Schin {
10934887Schin if (!mbwide())
10944887Schin {
10954887Schin for (s += n; n-- >= m; s--)
10964887Schin switch (follow(env, rex, cont, s))
10974887Schin {
10984887Schin case BAD:
10994887Schin return BAD;
11004887Schin case CUT:
11014887Schin return CUT;
11024887Schin case BEST:
11034887Schin return BEST;
11044887Schin case GOOD:
11054887Schin r = GOOD;
11064887Schin break;
11074887Schin }
11084887Schin }
11094887Schin else
11104887Schin {
11114887Schin if (!(b = (unsigned char*)stkpush(stkstd, n)))
11124887Schin {
11134887Schin env->error = REG_ESPACE;
11144887Schin return BAD;
11154887Schin }
11164887Schin e = env->end;
11174887Schin for (i = 0; s < e && i < n && *s != c; i++)
11184887Schin s += b[i] = MBSIZE(s);
11194887Schin for (; i-- >= m; s -= b[i])
11204887Schin switch (follow(env, rex, cont, s))
11214887Schin {
11224887Schin case BAD:
11234887Schin stkpop(stkstd);
11244887Schin return BAD;
11254887Schin case CUT:
11264887Schin stkpop(stkstd);
11274887Schin return CUT;
11284887Schin case BEST:
11294887Schin stkpop(stkstd);
11304887Schin return BEST;
11314887Schin case GOOD:
11324887Schin r = GOOD;
11334887Schin break;
11344887Schin }
11354887Schin stkpop(stkstd);
11364887Schin }
11374887Schin }
11384887Schin else
11394887Schin {
11404887Schin if (!mbwide())
11414887Schin {
11424887Schin e = s + n;
11434887Schin for (s += m; s <= e; s++)
11444887Schin switch (follow(env, rex, cont, s))
11454887Schin {
11464887Schin case BAD:
11474887Schin return BAD;
11484887Schin case CUT:
11494887Schin return CUT;
11504887Schin case BEST:
11514887Schin case GOOD:
11524887Schin return BEST;
11534887Schin }
11544887Schin }
11554887Schin else
11564887Schin {
11574887Schin e = env->end;
11584887Schin for (i = 0; s < e && i < m && *s != c; i++)
11594887Schin s += MBSIZE(s);
11604887Schin if (i >= m)
11614887Schin for (; s <= e && i <= n; s += MBSIZE(s), i++)
11624887Schin switch (follow(env, rex, cont, s))
11634887Schin {
11644887Schin case BAD:
11654887Schin return BAD;
11664887Schin case CUT:
11674887Schin return CUT;
11684887Schin case BEST:
11694887Schin case GOOD:
11704887Schin return BEST;
11714887Schin }
11724887Schin }
11734887Schin }
11744887Schin return r;
11754887Schin case REX_END:
11764887Schin if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
11774887Schin return NONE;
11784887Schin break;
11794887Schin case REX_GROUP:
11804887Schin DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
11814887Schin if (env->stack)
11824887Schin {
11834887Schin if (rex->re.group.number)
11844887Schin env->match[rex->re.group.number].rm_so = s - env->beg;
11854887Schin if (pospush(env, rex, s, BEG_SUB))
11864887Schin return BAD;
11874887Schin catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
11884887Schin }
11894887Schin catcher.type = REX_GROUP_CATCH;
11904887Schin catcher.serial = rex->serial;
11914887Schin catcher.re.group_catch.cont = cont;
11924887Schin catcher.next = rex->next;
11934887Schin r = parse(env, rex->re.group.expr.rex, &catcher, s);
11944887Schin if (env->stack)
11954887Schin {
11964887Schin pospop(env);
11974887Schin if (rex->re.group.number)
11984887Schin env->match[rex->re.group.number].rm_so = -1;
11994887Schin }
12004887Schin return r;
12014887Schin case REX_GROUP_CATCH:
12024887Schin DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
12034887Schin if (env->stack)
12044887Schin {
12054887Schin if (rex->re.group_catch.eo)
12064887Schin *rex->re.group_catch.eo = s - env->beg;
12074887Schin if (pospush(env, rex, s, END_ANY))
12084887Schin return BAD;
12094887Schin }
12104887Schin r = follow(env, rex, rex->re.group_catch.cont, s);
12114887Schin if (env->stack)
12124887Schin {
12134887Schin pospop(env);
12144887Schin if (rex->re.group_catch.eo)
12154887Schin *rex->re.group_catch.eo = -1;
12164887Schin }
12174887Schin return r;
12184887Schin case REX_GROUP_AHEAD:
12194887Schin catcher.type = REX_GROUP_AHEAD_CATCH;
12204887Schin catcher.flags = rex->flags;
12214887Schin catcher.serial = rex->serial;
12224887Schin catcher.re.rep_catch.beg = s;
12234887Schin catcher.re.rep_catch.cont = cont;
12244887Schin catcher.next = rex->next;
12254887Schin return parse(env, rex->re.group.expr.rex, &catcher, s);
12264887Schin case REX_GROUP_AHEAD_CATCH:
12274887Schin return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
12284887Schin case REX_GROUP_AHEAD_NOT:
12294887Schin r = parse(env, rex->re.group.expr.rex, NiL, s);
12304887Schin if (r == NONE)
12314887Schin r = follow(env, rex, cont, s);
12324887Schin else if (r != BAD)
12334887Schin r = NONE;
12344887Schin return r;
12354887Schin case REX_GROUP_BEHIND:
12364887Schin if ((s - env->beg) < rex->re.group.size)
12374887Schin return NONE;
12384887Schin catcher.type = REX_GROUP_BEHIND_CATCH;
12394887Schin catcher.flags = rex->flags;
12404887Schin catcher.serial = rex->serial;
12414887Schin catcher.re.behind_catch.beg = s;
12424887Schin catcher.re.behind_catch.end = e = env->end;
12434887Schin catcher.re.behind_catch.cont = cont;
12444887Schin catcher.next = rex->next;
12454887Schin for (t = s - rex->re.group.size; t >= env->beg; t--)
12464887Schin {
12474887Schin env->end = s;
12484887Schin r = parse(env, rex->re.group.expr.rex, &catcher, t);
12494887Schin env->end = e;
12504887Schin if (r != NONE)
12514887Schin return r;
12524887Schin }
12534887Schin return NONE;
12544887Schin case REX_GROUP_BEHIND_CATCH:
12554887Schin if (s != rex->re.behind_catch.beg)
12564887Schin return NONE;
12574887Schin env->end = rex->re.behind_catch.end;
12584887Schin return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
12594887Schin case REX_GROUP_BEHIND_NOT:
12604887Schin if ((s - env->beg) < rex->re.group.size)
12614887Schin r = NONE;
12624887Schin else
12634887Schin {
12644887Schin catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
12654887Schin catcher.re.neg_catch.beg = s;
12664887Schin catcher.next = 0;
12674887Schin e = env->end;
12684887Schin env->end = s;
12694887Schin for (t = s - rex->re.group.size; t >= env->beg; t--)
12704887Schin {
12714887Schin r = parse(env, rex->re.group.expr.rex, &catcher, t);
12724887Schin if (r != NONE)
12734887Schin break;
12744887Schin }
12754887Schin env->end = e;
12764887Schin }
12774887Schin if (r == NONE)
12784887Schin r = follow(env, rex, cont, s);
12794887Schin else if (r != BAD)
12804887Schin r = NONE;
12814887Schin return r;
12824887Schin case REX_GROUP_BEHIND_NOT_CATCH:
12834887Schin return s == rex->re.neg_catch.beg ? GOOD : NONE;
12844887Schin case REX_GROUP_COND:
12854887Schin if (q = rex->re.group.expr.binary.right)
12864887Schin {
12874887Schin catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
12884887Schin catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
12894887Schin }
12904887Schin else
12914887Schin catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
12924887Schin if (q = rex->re.group.expr.binary.left)
12934887Schin {
12944887Schin catcher.type = REX_GROUP_COND_CATCH;
12954887Schin catcher.flags = rex->flags;
12964887Schin catcher.serial = rex->serial;
12974887Schin catcher.re.cond_catch.yes = 0;
12984887Schin catcher.re.cond_catch.beg = s;
12994887Schin catcher.re.cond_catch.cont = cont;
13004887Schin catcher.next = rex->next;
13014887Schin r = parse(env, q, &catcher, s);
13024887Schin if (r == BAD || catcher.re.cond_catch.yes)
13034887Schin return r;
13044887Schin }
13054887Schin else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
13064887Schin r = GOOD;
13074887Schin else
13084887Schin r = NONE;
13094887Schin if (q = catcher.re.cond_catch.next[r != NONE])
13104887Schin {
13114887Schin catcher.type = REX_CAT;
13124887Schin catcher.flags = q->flags;
13134887Schin catcher.serial = q->serial;
13144887Schin catcher.re.group_catch.cont = cont;
13154887Schin catcher.next = rex->next;
13164887Schin return parse(env, q, &catcher, s);
13174887Schin }
13184887Schin return follow(env, rex, cont, s);
13194887Schin case REX_GROUP_COND_CATCH:
13204887Schin rex->re.cond_catch.yes = 1;
13214887Schin catcher.type = REX_CAT;
13224887Schin catcher.flags = rex->flags;
13234887Schin catcher.serial = rex->serial;
13244887Schin catcher.re.group_catch.cont = rex->re.cond_catch.cont;
13254887Schin catcher.next = rex->next;
13264887Schin return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
13274887Schin case REX_CAT:
13284887Schin return follow(env, rex, rex->re.group_catch.cont, s);
13294887Schin case REX_GROUP_CUT:
13304887Schin catcher.type = REX_GROUP_CUT_CATCH;
13314887Schin catcher.flags = rex->flags;
13324887Schin catcher.serial = rex->serial;
13334887Schin catcher.re.group_catch.cont = cont;
13344887Schin catcher.next = rex->next;
13354887Schin return parse(env, rex->re.group.expr.rex, &catcher, s);
13364887Schin case REX_GROUP_CUT_CATCH:
13374887Schin switch (r = follow(env, rex, rex->re.group_catch.cont, s))
13384887Schin {
13394887Schin case GOOD:
13404887Schin r = BEST;
13414887Schin break;
13424887Schin case NONE:
13434887Schin r = CUT;
13444887Schin break;
13454887Schin }
13464887Schin return r;
13474887Schin case REX_KMP:
13484887Schin f = rex->re.string.fail;
13494887Schin b = rex->re.string.base;
13504887Schin n = rex->re.string.size;
13514887Schin t = s;
13524887Schin e = env->end;
13534887Schin if (p = rex->map)
13544887Schin {
13554887Schin while (t + n <= e)
13564887Schin {
13574887Schin for (i = -1; t < e; t++)
13584887Schin {
13594887Schin while (i >= 0 && b[i+1] != p[*t])
13604887Schin i = f[i];
13614887Schin if (b[i+1] == p[*t])
13624887Schin i++;
13634887Schin if (i + 1 == n)
13644887Schin {
13654887Schin t++;
13664887Schin if (env->stack)
13674887Schin env->best[0].rm_so = t - s - n;
13684887Schin switch (follow(env, rex, cont, t))
13694887Schin {
13704887Schin case BAD:
13714887Schin return BAD;
13724887Schin case CUT:
13734887Schin return CUT;
13744887Schin case BEST:
13754887Schin case GOOD:
13764887Schin return BEST;
13774887Schin }
13784887Schin t -= n - 1;
13794887Schin break;
13804887Schin }
13814887Schin }
13824887Schin }
13834887Schin }
13844887Schin else
13854887Schin {
13864887Schin while (t + n <= e)
13874887Schin {
13884887Schin for (i = -1; t < e; t++)
13894887Schin {
13904887Schin while (i >= 0 && b[i+1] != *t)
13914887Schin i = f[i];
13924887Schin if (b[i+1] == *t)
13934887Schin i++;
13944887Schin if (i + 1 == n)
13954887Schin {
13964887Schin t++;
13974887Schin if (env->stack)
13984887Schin env->best[0].rm_so = t - s - n;
13994887Schin switch (follow(env, rex, cont, t))
14004887Schin {
14014887Schin case BAD:
14024887Schin return BAD;
14034887Schin case CUT:
14044887Schin return CUT;
14054887Schin case BEST:
14064887Schin case GOOD:
14074887Schin return BEST;
14084887Schin }
14094887Schin t -= n - 1;
14104887Schin break;
14114887Schin }
14124887Schin }
14134887Schin }
14144887Schin }
14154887Schin return NONE;
14164887Schin case REX_NEG:
14174887Schin if (LEADING(env, rex, s))
14184887Schin return NONE;
14194887Schin i = env->end - s;
14204887Schin n = ((i + 7) >> 3) + 1;
14214887Schin catcher.type = REX_NEG_CATCH;
14224887Schin catcher.re.neg_catch.beg = s;
14234887Schin if (!(p = (unsigned char*)stkpush(stkstd, n)))
14244887Schin return BAD;
14254887Schin memset(catcher.re.neg_catch.index = p, 0, n);
14264887Schin catcher.next = rex->next;
14274887Schin if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
14284887Schin r = BAD;
14294887Schin else
14304887Schin {
14314887Schin r = NONE;
14324887Schin for (; i >= 0; i--)
14334887Schin if (!bittst(p, i))
14344887Schin {
14354887Schin switch (follow(env, rex, cont, s + i))
14364887Schin {
14374887Schin case BAD:
14384887Schin r = BAD;
14394887Schin break;
14404887Schin case BEST:
14414887Schin r = BEST;
14424887Schin break;
14434887Schin case CUT:
14444887Schin r = CUT;
14454887Schin break;
14464887Schin case GOOD:
14474887Schin r = GOOD;
14484887Schin /*FALLTHROUGH*/
14494887Schin default:
14504887Schin continue;
14514887Schin }
14524887Schin break;
14534887Schin }
14544887Schin }
14554887Schin stkpop(stkstd);
14564887Schin return r;
14574887Schin case REX_NEG_CATCH:
14584887Schin bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
14594887Schin return NONE;
14604887Schin case REX_NEST:
14618462SApril.Chin@Sun.COM if (s >= env->end)
14628462SApril.Chin@Sun.COM return NONE;
14634887Schin do
14644887Schin {
14654887Schin if ((c = *s++) == rex->re.nest.primary)
14664887Schin {
14674887Schin if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
14684887Schin return NONE;
14694887Schin break;
14704887Schin }
14714887Schin if (rex->re.nest.primary >= 0)
14724887Schin return NONE;
14734887Schin if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
14744887Schin break;
14754887Schin if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
14764887Schin return NONE;
14774887Schin } while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
14784887Schin break;
14794887Schin case REX_NULL:
14804887Schin break;
14814887Schin case REX_ONECHAR:
14824887Schin n = rex->hi;
14834887Schin if (n > env->end - s)
14844887Schin n = env->end - s;
14854887Schin m = rex->lo;
14864887Schin if (m > n)
14874887Schin return NONE;
14884887Schin r = NONE;
14894887Schin c = rex->re.onechar;
14904887Schin if (!(rex->flags & REG_MINIMAL))
14914887Schin {
14924887Schin if (!mbwide())
14934887Schin {
14944887Schin if (p = rex->map)
14954887Schin {
14964887Schin for (i = 0; i < n; i++, s++)
14974887Schin if (p[*s] != c)
14984887Schin break;
14994887Schin }
15004887Schin else
15014887Schin {
15024887Schin for (i = 0; i < n; i++, s++)
15034887Schin if (*s != c)
15044887Schin break;
15054887Schin }
15064887Schin for (; i-- >= m; s--)
15074887Schin switch (follow(env, rex, cont, s))
15084887Schin {
15094887Schin case BAD:
15104887Schin return BAD;
15114887Schin case BEST:
15124887Schin return BEST;
15134887Schin case CUT:
15144887Schin return CUT;
15154887Schin case GOOD:
15164887Schin r = GOOD;
15174887Schin break;
15184887Schin }
15194887Schin }
15204887Schin else
15214887Schin {
15224887Schin if (!(b = (unsigned char*)stkpush(stkstd, n)))
15234887Schin {
15244887Schin env->error = REG_ESPACE;
15254887Schin return BAD;
15264887Schin }
15274887Schin e = env->end;
15284887Schin if (!(rex->flags & REG_ICASE))
15294887Schin {
15304887Schin for (i = 0; s < e && i < n; i++, s = t)
15314887Schin {
15324887Schin t = s;
15334887Schin if (mbchar(t) != c)
15344887Schin break;
15354887Schin b[i] = t - s;
15364887Schin }
15374887Schin }
15384887Schin else
15394887Schin {
15404887Schin for (i = 0; s < e && i < n; i++, s = t)
15414887Schin {
15424887Schin t = s;
15434887Schin if (towupper(mbchar(t)) != c)
15444887Schin break;
15454887Schin b[i] = t - s;
15464887Schin }
15474887Schin }
15484887Schin for (; i-- >= m; s -= b[i])
15494887Schin switch (follow(env, rex, cont, s))
15504887Schin {
15514887Schin case BAD:
15524887Schin stkpop(stkstd);
15534887Schin return BAD;
15544887Schin case BEST:
15554887Schin stkpop(stkstd);
15564887Schin return BEST;
15574887Schin case CUT:
15584887Schin stkpop(stkstd);
15594887Schin return CUT;
15604887Schin case GOOD:
15614887Schin r = GOOD;
15624887Schin break;
15634887Schin }
15644887Schin stkpop(stkstd);
15654887Schin }
15664887Schin }
15674887Schin else
15684887Schin {
15694887Schin if (!mbwide())
15704887Schin {
15714887Schin e = s + m;
15724887Schin if (p = rex->map)
15734887Schin {
15744887Schin for (; s < e; s++)
15754887Schin if (p[*s] != c)
15764887Schin return r;
15774887Schin e += n - m;
15784887Schin for (;;)
15794887Schin {
15804887Schin switch (follow(env, rex, cont, s))
15814887Schin {
15824887Schin case BAD:
15834887Schin return BAD;
15844887Schin case CUT:
15854887Schin return CUT;
15864887Schin case BEST:
15874887Schin case GOOD:
15884887Schin return BEST;
15894887Schin }
15904887Schin if (s >= e || p[*s++] != c)
15914887Schin break;
15924887Schin }
15934887Schin }
15944887Schin else
15954887Schin {
15964887Schin for (; s < e; s++)
15974887Schin if (*s != c)
15984887Schin return r;
15994887Schin e += n - m;
16004887Schin for (;;)
16014887Schin {
16024887Schin switch (follow(env, rex, cont, s))
16034887Schin {
16044887Schin case BAD:
16054887Schin return BAD;
16064887Schin case CUT:
16074887Schin return CUT;
16084887Schin case BEST:
16094887Schin case GOOD:
16104887Schin return BEST;
16114887Schin }
16124887Schin if (s >= e || *s++ != c)
16134887Schin break;
16144887Schin }
16154887Schin }
16164887Schin }
16174887Schin else
16184887Schin {
16194887Schin if (!(rex->flags & REG_ICASE))
16204887Schin {
16214887Schin for (i = 0; i < m && s < e; i++, s = t)
16224887Schin {
16234887Schin t = s;
16244887Schin if (mbchar(t) != c)
16254887Schin return r;
16264887Schin }
16274887Schin while (i++ <= n)
16284887Schin {
16294887Schin switch (follow(env, rex, cont, s))
16304887Schin {
16314887Schin case BAD:
16324887Schin return BAD;
16334887Schin case CUT:
16344887Schin return CUT;
16354887Schin case BEST:
16364887Schin case GOOD:
16374887Schin return BEST;
16384887Schin }
16394887Schin if (s >= e || mbchar(s) != c)
16404887Schin break;
16414887Schin }
16424887Schin }
16434887Schin else
16444887Schin {
16454887Schin for (i = 0; i < m && s < e; i++, s = t)
16464887Schin {
16474887Schin t = s;
16484887Schin if (towupper(mbchar(t)) != c)
16494887Schin return r;
16504887Schin }
16514887Schin while (i++ <= n)
16524887Schin {
16534887Schin switch (follow(env, rex, cont, s))
16544887Schin {
16554887Schin case BAD:
16564887Schin return BAD;
16574887Schin case CUT:
16584887Schin return CUT;
16594887Schin case BEST:
16604887Schin case GOOD:
16614887Schin return BEST;
16624887Schin }
16634887Schin if (s >= e || towupper(mbchar(s)) != c)
16644887Schin break;
16654887Schin }
16664887Schin }
16674887Schin }
16684887Schin }
16694887Schin return r;
16704887Schin case REX_REP:
16714887Schin if (env->stack && pospush(env, rex, s, BEG_REP))
16724887Schin return BAD;
16734887Schin r = parserep(env, rex, cont, s, 0);
16744887Schin if (env->stack)
16754887Schin pospop(env);
16764887Schin return r;
16774887Schin case REX_REP_CATCH:
16784887Schin DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
16794887Schin if (env->stack && pospush(env, rex, s, END_ANY))
16804887Schin return BAD;
16814887Schin if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
16824887Schin {
16834887Schin /*
16844887Schin * optional empty iteration
16854887Schin */
16864887Schin
16874887Schin DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
16884887Schin if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
16894887Schin r = NONE;
16904887Schin else if (pospush(env, rex, s, END_ANY))
16914887Schin r = BAD;
16924887Schin else
16934887Schin {
16944887Schin r = follow(env, rex, rex->re.rep_catch.cont, s);
16954887Schin pospop(env);
16964887Schin }
16974887Schin }
16984887Schin else
16994887Schin r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
17004887Schin if (env->stack)
17014887Schin pospop(env);
17024887Schin return r;
17034887Schin case REX_STRING:
17044887Schin DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
17054887Schin if (rex->re.string.size > (env->end - s))
17064887Schin return NONE;
17074887Schin t = rex->re.string.base;
17084887Schin e = t + rex->re.string.size;
17094887Schin if (!(p = rex->map))
17104887Schin {
17114887Schin while (t < e)
17124887Schin if (*s++ != *t++)
17134887Schin return NONE;
17144887Schin }
17154887Schin else if (!mbwide())
17164887Schin {
17174887Schin while (t < e)
17184887Schin if (p[*s++] != *t++)
17194887Schin return NONE;
17204887Schin }
17214887Schin else
17224887Schin {
17234887Schin while (t < e)
17244887Schin {
17254887Schin c = mbchar(s);
17264887Schin d = mbchar(t);
17274887Schin if (towupper(c) != d)
17284887Schin return NONE;
17294887Schin }
17304887Schin }
17314887Schin break;
17324887Schin case REX_TRIE:
17334887Schin if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
17344887Schin return NONE;
17354887Schin return parsetrie(env, x, rex, cont, s);
17364887Schin case REX_EXEC:
17374887Schin u = 0;
17384887Schin r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
17394887Schin e = (unsigned char*)u;
17404887Schin if (e >= s && e <= env->end)
17414887Schin s = e;
17424887Schin switch (r)
17434887Schin {
17444887Schin case 0:
17454887Schin break;
17464887Schin case REG_NOMATCH:
17474887Schin return NONE;
17484887Schin default:
17494887Schin env->error = r;
17504887Schin return BAD;
17514887Schin }
17524887Schin break;
17534887Schin case REX_WBEG:
17544887Schin if (!isword(*s) || s > env->beg && isword(*(s - 1)))
17554887Schin return NONE;
17564887Schin break;
17574887Schin case REX_WEND:
17584887Schin if (isword(*s) || s > env->beg && !isword(*(s - 1)))
17594887Schin return NONE;
17604887Schin break;
17614887Schin case REX_WORD:
17624887Schin if (s > env->beg && isword(*(s - 1)) == isword(*s))
17634887Schin return NONE;
17644887Schin break;
17654887Schin case REX_WORD_NOT:
17664887Schin if (s == env->beg || isword(*(s - 1)) != isword(*s))
17674887Schin return NONE;
17684887Schin break;
17694887Schin case REX_BEG_STR:
17704887Schin if (s != env->beg)
17714887Schin return NONE;
17724887Schin break;
17734887Schin case REX_END_STR:
17744887Schin for (t = s; t < env->end && *t == '\n'; t++);
17754887Schin if (t < env->end)
17764887Schin return NONE;
17774887Schin break;
17784887Schin case REX_FIN_STR:
17794887Schin if (s < env->end)
17804887Schin return NONE;
17814887Schin break;
17824887Schin }
17834887Schin if (!(rex = rex->next))
17844887Schin {
17854887Schin if (!(rex = cont))
17864887Schin break;
17874887Schin cont = 0;
17884887Schin }
17894887Schin }
17904887Schin return GOOD;
17914887Schin }
17924887Schin
17934887Schin #if _AST_REGEX_DEBUG
17944887Schin
17954887Schin static void
listnode(Rex_t * e,int level)17964887Schin listnode(Rex_t* e, int level)
17974887Schin {
17984887Schin int i;
17994887Schin
18004887Schin if (e)
18014887Schin {
18024887Schin do
18034887Schin {
18044887Schin for (i = 0; i < level; i++)
18054887Schin sfprintf(sfstderr, " ");
18064887Schin sfprintf(sfstderr, "%s\n", rexname(e));
18074887Schin switch (e->type)
18084887Schin {
18094887Schin case REX_ALT:
18104887Schin case REX_CONJ:
18114887Schin listnode(e->re.group.expr.binary.left, level + 1);
18124887Schin listnode(e->re.group.expr.binary.right, level + 1);
18134887Schin break;
18144887Schin case REX_GROUP:
18154887Schin case REX_GROUP_AHEAD:
18164887Schin case REX_GROUP_AHEAD_NOT:
18174887Schin case REX_GROUP_BEHIND:
18184887Schin case REX_GROUP_BEHIND_NOT:
18194887Schin case REX_GROUP_CUT:
18204887Schin case REX_NEG:
18214887Schin case REX_REP:
18224887Schin listnode(e->re.group.expr.rex, level + 1);
18234887Schin break;
18244887Schin }
18254887Schin } while (e = e->next);
18264887Schin }
18274887Schin }
18284887Schin
18294887Schin static int
list(Env_t * env,Rex_t * rex)18304887Schin list(Env_t* env, Rex_t* rex)
18314887Schin {
18324887Schin sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
18334887Schin if (rex)
18344887Schin listnode(rex, 1);
18354887Schin return 0;
18364887Schin }
18374887Schin
18384887Schin #endif
18394887Schin
18404887Schin /*
18414887Schin * returning REG_BADPAT or REG_ESPACE is not explicitly
18424887Schin * countenanced by the standard
18434887Schin */
18444887Schin
18454887Schin int
regnexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags)18464887Schin regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
18474887Schin {
18484887Schin register int n;
18494887Schin register int i;
18504887Schin int j;
18514887Schin int k;
18524887Schin int m;
18534887Schin int advance;
18544887Schin Env_t* env;
18554887Schin Rex_t* e;
18564887Schin
18574887Schin DEBUG_INIT();
18584887Schin DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
18594887Schin if (!p || !(env = p->env))
18604887Schin return REG_BADPAT;
18614887Schin if (!s)
18624887Schin return fatal(env->disc, REG_BADPAT, NiL);
18634887Schin if (len < env->min)
18644887Schin {
18654887Schin DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
18664887Schin return REG_NOMATCH;
18674887Schin }
18684887Schin env->regex = p;
18694887Schin env->beg = (unsigned char*)s;
18704887Schin env->end = env->beg + len;
18714887Schin stknew(stkstd, &env->stk);
18724887Schin env->flags &= ~REG_EXEC;
18734887Schin env->flags |= (flags & REG_EXEC);
18744887Schin advance = 0;
18754887Schin if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
18764887Schin {
18774887Schin n = env->nsub;
18784887Schin if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
18794887Schin !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
18804887Schin !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
18814887Schin {
18824887Schin k = REG_ESPACE;
18834887Schin goto done;
18844887Schin }
18854887Schin env->pos->cur = env->bestpos->cur = 0;
18864887Schin env->best = &env->match[n + 1];
18874887Schin env->best[0].rm_so = 0;
18884887Schin env->best[0].rm_eo = -1;
18894887Schin for (i = 0; i <= n; i++)
18904887Schin env->match[i] = state.nomatch;
18914887Schin if (flags & REG_ADVANCE)
18924887Schin advance = 1;
18934887Schin }
18944887Schin DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
18954887Schin k = REG_NOMATCH;
18964887Schin if ((e = env->rex)->type == REX_BM)
18974887Schin {
18984887Schin DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
18994887Schin if (len < e->re.bm.right)
19004887Schin {
19014887Schin DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
19024887Schin goto done;
19034887Schin }
19044887Schin else if (!(flags & REG_LEFT))
19054887Schin {
19064887Schin register unsigned char* buf = (unsigned char*)s;
19074887Schin register size_t index = e->re.bm.left + e->re.bm.size;
19084887Schin register size_t mid = len - e->re.bm.right;
19094887Schin register size_t* skip = e->re.bm.skip;
19104887Schin register size_t* fail = e->re.bm.fail;
19114887Schin register Bm_mask_t** mask = e->re.bm.mask;
19124887Schin Bm_mask_t m;
19134887Schin size_t x;
19144887Schin
19154887Schin DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
19164887Schin for (;;)
19174887Schin {
19184887Schin while ((index += skip[buf[index]]) < mid);
19194887Schin if (index < HIT)
19204887Schin {
19214887Schin DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
19224887Schin goto done;
19234887Schin }
19244887Schin index -= HIT;
19254887Schin m = mask[n = e->re.bm.size - 1][buf[index]];
19264887Schin do
19274887Schin {
19284887Schin if (!n--)
19294887Schin {
19304887Schin if (e->re.bm.back < 0)
19314887Schin goto possible;
19324887Schin if (advance)
19334887Schin {
19344887Schin i = index - e->re.bm.back;
19354887Schin s += i;
19364887Schin if (env->stack)
19374887Schin env->best[0].rm_so += i;
19384887Schin goto possible;
19394887Schin }
19404887Schin x = index;
19414887Schin if (index < e->re.bm.back)
19424887Schin index = 0;
19434887Schin else
19444887Schin index -= e->re.bm.back;
19454887Schin while (index <= x)
19464887Schin {
19474887Schin if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
19484887Schin {
19494887Schin if (env->stack)
19504887Schin env->best[0].rm_so = index;
19514887Schin n = env->nsub;
19524887Schin goto hit;
19534887Schin }
19544887Schin index++;
19554887Schin }
19564887Schin index += e->re.bm.size;
19574887Schin break;
19584887Schin }
19594887Schin } while (m &= mask[n][buf[--index]]);
19604887Schin if ((index += fail[n + 1]) >= len)
19614887Schin goto done;
19624887Schin }
19634887Schin }
19644887Schin possible:
19654887Schin n = env->nsub;
19664887Schin e = e->next;
19674887Schin }
19684887Schin j = env->once || (flags & REG_LEFT);
19694887Schin DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
19704887Schin while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
19714887Schin {
19724887Schin if (j)
19734887Schin goto done;
19744887Schin i = MBSIZE(s);
19754887Schin s += i;
19764887Schin if ((unsigned char*)s > env->end - env->min)
19774887Schin goto done;
19784887Schin if (env->stack)
19794887Schin env->best[0].rm_so += i;
19804887Schin }
19814887Schin if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
19824887Schin goto done;
19834887Schin hit:
19844887Schin if (k = env->error)
19854887Schin goto done;
19864887Schin if (i == CUT)
19874887Schin {
19884887Schin k = env->error = REG_NOMATCH;
19894887Schin goto done;
19904887Schin }
19914887Schin if (!(env->flags & REG_NOSUB))
19924887Schin {
19934887Schin k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
19944887Schin for (i = j = m = 0; j < nmatch; i++)
19954887Schin if (!i || !k || (i & 1))
19964887Schin {
19974887Schin if (i > n)
19984887Schin match[j] = state.nomatch;
19994887Schin else
20004887Schin match[m = j] = env->best[i];
20014887Schin j++;
20024887Schin }
20034887Schin if (k)
20044887Schin {
20054887Schin while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
20064887Schin m--;
20074887Schin ((regex_t*)p)->re_nsub = m;
20084887Schin }
20094887Schin }
20104887Schin k = 0;
20114887Schin done:
20124887Schin stkold(stkstd, &env->stk);
20134887Schin env->stk.base = 0;
20144887Schin if (k > REG_NOMATCH)
20154887Schin fatal(p->env->disc, k, NiL);
20164887Schin return k;
20174887Schin }
20184887Schin
20194887Schin void
regfree(regex_t * p)20204887Schin regfree(regex_t* p)
20214887Schin {
20224887Schin Env_t* env;
20234887Schin
20244887Schin if (p && (env = p->env))
20254887Schin {
20264887Schin #if _REG_subcomp
20274887Schin if (env->sub)
20284887Schin {
20294887Schin regsubfree(p);
20304887Schin p->re_sub = 0;
20314887Schin }
20324887Schin #endif
20334887Schin p->env = 0;
20344887Schin if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
20354887Schin {
20364887Schin drop(env->disc, env->rex);
20374887Schin if (env->pos)
20384887Schin vecclose(env->pos);
20394887Schin if (env->bestpos)
20404887Schin vecclose(env->bestpos);
20414887Schin if (env->stk.base)
20424887Schin stkold(stkstd, &env->stk);
20434887Schin alloc(env->disc, env, 0);
20444887Schin }
20454887Schin }
20464887Schin }
2047