14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * Phong Vo <kpv@research.att.com> *
204887Schin * *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin
244887Schin /*
254887Schin * posix regex decompiler
264887Schin */
274887Schin
284887Schin #include "reglib.h"
294887Schin
304887Schin #undef ismeta
314887Schin #define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
324887Schin #define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
334887Schin
344887Schin static void
detrie(Trie_node_t * x,Sfio_t * sp,char * b,char * p,char * e,int delimiter)354887Schin detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
364887Schin {
374887Schin register Trie_node_t* y;
384887Schin char* o;
394887Schin int k;
404887Schin
414887Schin o = p;
424887Schin k = 1;
434887Schin do
444887Schin {
454887Schin if (k)
464887Schin {
474887Schin o = p;
484887Schin if (p < e)
494887Schin *p++ = x->c;
504887Schin }
514887Schin sfputc(sp, x->c);
524887Schin for (y = x->sib; y; y = y->sib)
534887Schin {
544887Schin sfputc(sp, '|');
554887Schin sfputc(sp, '<');
564887Schin sfwrite(sp, b, p - b);
574887Schin sfputc(sp, '>');
584887Schin detrie(y, sp, b, p, e, delimiter);
594887Schin }
604887Schin if (x->end && x->son)
614887Schin {
624887Schin sfputc(sp, '|');
634887Schin sfputc(sp, '{');
644887Schin sfwrite(sp, b, p - b);
654887Schin sfputc(sp, '}');
664887Schin p = o;
674887Schin }
684887Schin } while (x = x->son);
694887Schin }
704887Schin
714887Schin static int
decomp(register Rex_t * e,Sfio_t * sp,int type,int delimiter,regflags_t flags)724887Schin decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
734887Schin {
744887Schin Rex_t* q;
754887Schin unsigned char* s;
764887Schin unsigned char* t;
774887Schin int c;
78*12068SRoger.Faulkner@Oracle.COM int m;
794887Schin int cb;
804887Schin int cd;
814887Schin int cr;
824887Schin int ib;
834887Schin int ie;
844887Schin int nb;
854887Schin int ne;
864887Schin unsigned char ic[2*UCHAR_MAX];
874887Schin unsigned char nc[2*UCHAR_MAX];
884887Schin
894887Schin do
904887Schin {
914887Schin switch (e->type)
924887Schin {
934887Schin case REX_ALT:
944887Schin if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
954887Schin return 1;
964887Schin sfputc(sp, '|');
974887Schin if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
984887Schin return 1;
994887Schin break;
1004887Schin case REX_BACK:
1014887Schin sfprintf(sp, "\\%d", e->lo);
1024887Schin break;
1034887Schin case REX_BEG:
1044887Schin if (type < SRE)
1054887Schin sfputc(sp, '^');
1064887Schin break;
1074887Schin case REX_END:
1084887Schin if (type < SRE)
1094887Schin sfputc(sp, '$');
1104887Schin break;
1114887Schin case REX_WBEG:
1124887Schin meta(sp, '<', type, 1, delimiter);
1134887Schin break;
1144887Schin case REX_WEND:
1154887Schin meta(sp, '<', type, 1, delimiter);
1164887Schin break;
1174887Schin case REX_WORD:
1184887Schin sfprintf(sp, "\\w");
1194887Schin break;
1204887Schin case REX_CLASS:
1214887Schin case REX_COLL_CLASS:
1224887Schin case REX_ONECHAR:
1234887Schin case REX_DOT:
1244887Schin case REX_REP:
1254887Schin if (type >= SRE)
1264887Schin {
1274887Schin c = ')';
1284887Schin if (e->hi == RE_DUP_INF)
1294887Schin {
1304887Schin if (!e->lo)
1314887Schin sfputc(sp, '*');
1324887Schin else if (e->lo == 1)
1334887Schin sfputc(sp, '+');
1344887Schin else
1354887Schin sfprintf(sp, "{%d,}", e->lo);
1364887Schin }
1374887Schin else if (e->hi != 1)
1384887Schin sfprintf(sp, "{%d,%d}", e->lo, e->hi);
1394887Schin else if (e->lo == 0)
1404887Schin sfputc(sp, '?');
1414887Schin else
1424887Schin c = 0;
1434887Schin }
1444887Schin switch (e->type)
1454887Schin {
1464887Schin case REX_REP:
1474887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
1484887Schin return 1;
1494887Schin break;
1504887Schin case REX_CLASS:
1514887Schin sfputc(sp, '[');
1524887Schin nb = ne = ib = ie = -2;
1534887Schin cb = cd = cr = 0;
1544887Schin s = nc;
1554887Schin t = ic;
156*12068SRoger.Faulkner@Oracle.COM for (m = 0; m <= UCHAR_MAX; m++)
157*12068SRoger.Faulkner@Oracle.COM if (settst(e->re.charclass, m))
1584887Schin {
159*12068SRoger.Faulkner@Oracle.COM if (m == ']')
1604887Schin cb = 1;
161*12068SRoger.Faulkner@Oracle.COM else if (m == '-')
1624887Schin cr = 1;
163*12068SRoger.Faulkner@Oracle.COM else if (m == delimiter)
1644887Schin cd = 1;
1654887Schin else if (nb < 0)
166*12068SRoger.Faulkner@Oracle.COM ne = nb = m;
167*12068SRoger.Faulkner@Oracle.COM else if (ne == (m - 1))
168*12068SRoger.Faulkner@Oracle.COM ne = m;
1694887Schin else
1704887Schin {
1714887Schin if (ne == nb)
1724887Schin *s++ = ne;
1734887Schin else
1744887Schin {
1754887Schin *s++ = nb;
1764887Schin *s++ = '-';
1774887Schin *s++ = ne;
1784887Schin }
179*12068SRoger.Faulkner@Oracle.COM ne = nb = m;
1804887Schin }
1814887Schin }
1824887Schin else
1834887Schin {
184*12068SRoger.Faulkner@Oracle.COM if (m == ']')
1854887Schin cb = -1;
186*12068SRoger.Faulkner@Oracle.COM else if (m == '-')
1874887Schin cr = -1;
188*12068SRoger.Faulkner@Oracle.COM else if (m == delimiter)
1894887Schin cd = -1;
1904887Schin else if (ib < 0)
191*12068SRoger.Faulkner@Oracle.COM ie = ib = m;
192*12068SRoger.Faulkner@Oracle.COM else if (ie == (m - 1))
193*12068SRoger.Faulkner@Oracle.COM ie = m;
1944887Schin else
1954887Schin {
1964887Schin if (ie == ib)
1974887Schin *t++ = ie;
1984887Schin else
1994887Schin {
2004887Schin *t++ = ib;
2014887Schin *t++ = '-';
2024887Schin *t++ = ie;
2034887Schin }
204*12068SRoger.Faulkner@Oracle.COM ie = ib = m;
2054887Schin }
2064887Schin }
2074887Schin if (nb >= 0)
2084887Schin {
2094887Schin *s++ = nb;
2104887Schin if (ne != nb)
2114887Schin {
2124887Schin *s++ = '-';
2134887Schin *s++ = ne;
2144887Schin }
2154887Schin }
2164887Schin if (ib >= 0)
2174887Schin {
2184887Schin *t++ = ib;
2194887Schin if (ie != ib)
2204887Schin {
2214887Schin *t++ = '-';
2224887Schin *t++ = ie;
2234887Schin }
2244887Schin }
2254887Schin if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
2264887Schin {
2274887Schin sfputc(sp, '^');
2284887Schin if (cb < 0)
2294887Schin sfputc(sp, ']');
2304887Schin if (cr < 0)
2314887Schin sfputc(sp, '-');
232*12068SRoger.Faulkner@Oracle.COM if (cd < 0 && delimiter > 0)
2334887Schin {
2344887Schin if (flags & REG_ESCAPE)
2354887Schin sfputc(sp, '\\');
2364887Schin sfputc(sp, delimiter);
2374887Schin }
2384887Schin sfwrite(sp, ic, t - ic);
2394887Schin }
2404887Schin else
2414887Schin {
2424887Schin if (cb > 0)
2434887Schin sfputc(sp, ']');
2444887Schin if (cr > 0)
2454887Schin sfputc(sp, '-');
246*12068SRoger.Faulkner@Oracle.COM if (cd > 0 && delimiter > 0)
2474887Schin {
2484887Schin if (flags & REG_ESCAPE)
2494887Schin sfputc(sp, '\\');
2504887Schin sfputc(sp, delimiter);
2514887Schin }
2524887Schin if (nc[0] == '^')
2534887Schin {
2544887Schin sfwrite(sp, nc + 1, s - nc - 1);
2554887Schin sfputc(sp, '^');
2564887Schin }
2574887Schin else
2584887Schin sfwrite(sp, nc, s - nc);
2594887Schin }
2604887Schin sfputc(sp, ']');
2614887Schin break;
2624887Schin case REX_COLL_CLASS:
2634887Schin break;
2644887Schin case REX_ONECHAR:
2654887Schin meta(sp, e->re.onechar, type, 0, delimiter);
2664887Schin break;
2674887Schin case REX_DOT:
2684887Schin sfputc(sp, '.');
2694887Schin break;
2704887Schin }
2714887Schin if (type < SRE)
2724887Schin {
2734887Schin if (e->hi == RE_DUP_INF)
2744887Schin {
2754887Schin if (!e->lo)
2764887Schin sfputc(sp, '*');
2774887Schin else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
2784887Schin meta(sp, '+', type, 1, delimiter);
2794887Schin else
2804887Schin {
2814887Schin meta(sp, '{', type, 1, delimiter);
2824887Schin sfprintf(sp, "%d,", e->lo);
2834887Schin meta(sp, '}', type, 1, delimiter);
2844887Schin }
2854887Schin }
2864887Schin else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
2874887Schin {
2884887Schin meta(sp, '{', type, 1, delimiter);
2894887Schin sfprintf(sp, "%d,%d", e->lo, e->hi);
2904887Schin meta(sp, '}', type, 1, delimiter);
2914887Schin }
2924887Schin else if (e->lo == 0)
2934887Schin meta(sp, '?', type, 1, delimiter);
2944887Schin }
2954887Schin else if (c)
2964887Schin sfputc(sp, c);
2974887Schin break;
2984887Schin case REX_STRING:
2994887Schin case REX_KMP:
3004887Schin t = (s = e->re.string.base) + e->re.string.size;
3014887Schin while (s < t)
3024887Schin {
3034887Schin c = *s++;
3044887Schin meta(sp, c, type, 0, delimiter);
3054887Schin }
3064887Schin break;
3074887Schin case REX_TRIE:
3084887Schin ib = 0;
3094887Schin for (c = 0; c <= UCHAR_MAX; c++)
3104887Schin if (e->re.trie.root[c])
3114887Schin {
3124887Schin char pfx[1024];
3134887Schin
3144887Schin if (ib)
3154887Schin sfputc(sp, '|');
3164887Schin else
3174887Schin ib = 1;
3184887Schin detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
3194887Schin }
3204887Schin break;
3214887Schin case REX_NEG:
3224887Schin if (type >= SRE)
3234887Schin sfprintf(sp, "!(");
3244887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
3254887Schin return 1;
3264887Schin if (type >= SRE)
3274887Schin sfputc(sp, ')');
3284887Schin else
3294887Schin sfputc(sp, '!');
3304887Schin break;
3314887Schin case REX_CONJ:
3324887Schin if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
3334887Schin return 1;
3344887Schin sfputc(sp, '&');
3354887Schin if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
3364887Schin return 1;
3374887Schin break;
3384887Schin case REX_GROUP:
3394887Schin if (type >= SRE)
3404887Schin sfputc(sp, '@');
3414887Schin meta(sp, '(', type, 1, delimiter);
3424887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
3434887Schin return 1;
3444887Schin meta(sp, ')', type, 1, delimiter);
3454887Schin break;
3464887Schin case REX_GROUP_AHEAD:
3474887Schin case REX_GROUP_AHEAD_NOT:
3484887Schin case REX_GROUP_BEHIND:
3494887Schin case REX_GROUP_BEHIND_NOT:
3504887Schin meta(sp, '(', type, 1, delimiter);
3514887Schin sfputc(sp, '?');
3524887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
3534887Schin return 1;
3544887Schin meta(sp, ')', type, 1, delimiter);
3554887Schin break;
3564887Schin case REX_GROUP_COND:
3574887Schin meta(sp, '(', type, 1, delimiter);
3584887Schin sfputc(sp, '?');
3594887Schin if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
3604887Schin return 1;
3614887Schin if (q = e->re.group.expr.binary.right)
3624887Schin {
3634887Schin sfputc(sp, ':');
3644887Schin if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
3654887Schin return 1;
3664887Schin sfputc(sp, ':');
3674887Schin if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
3684887Schin return 1;
3694887Schin }
3704887Schin meta(sp, ')', type, 1, delimiter);
3714887Schin break;
3724887Schin case REX_GROUP_CUT:
3734887Schin meta(sp, '(', type, 1, delimiter);
3744887Schin sfputc(sp, '?');
3754887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
3764887Schin return 1;
3774887Schin meta(sp, ')', type, 1, delimiter);
3784887Schin break;
3794887Schin case REX_BM:
3804887Schin break;
3814887Schin default:
3824887Schin sfprintf(sp, "<ERROR:REX_%d>", e->type);
3834887Schin break;
3844887Schin }
3854887Schin } while (e = e->next);
3864887Schin return 0;
3874887Schin }
3884887Schin
3894887Schin /*
3904887Schin * reconstruct pattern from compiled re p into sp
3914887Schin */
3924887Schin
3934887Schin size_t
regdecomp(regex_t * p,regflags_t flags,char * buf,size_t n)3944887Schin regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
3954887Schin {
3964887Schin Sfio_t* sp;
3974887Schin char* s;
3984887Schin int type;
3994887Schin int delimiter;
4004887Schin size_t r;
4014887Schin
4024887Schin if (!(sp = sfstropen()))
4034887Schin return 0;
404*12068SRoger.Faulkner@Oracle.COM if (flags == (regflags_t)~0)
4054887Schin flags = p->env->flags;
4064887Schin switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
4074887Schin {
4084887Schin case 0:
4094887Schin type = BRE;
4104887Schin break;
4114887Schin case REG_AUGMENTED:
4124887Schin case REG_AUGMENTED|REG_EXTENDED:
4134887Schin type = ARE;
4144887Schin break;
4154887Schin case REG_EXTENDED:
4164887Schin type = ERE;
4174887Schin break;
4184887Schin case REG_SHELL:
4194887Schin type = SRE;
4204887Schin break;
4214887Schin default:
4224887Schin type = KRE;
4234887Schin break;
4244887Schin }
4254887Schin if (flags & REG_DELIMITED)
4264887Schin {
4274887Schin delimiter = '/';
4284887Schin sfputc(sp, delimiter);
4294887Schin }
4304887Schin else
431*12068SRoger.Faulkner@Oracle.COM delimiter = -1;
4324887Schin if (decomp(p->env->rex, sp, type, delimiter, flags))
4334887Schin r = 0;
4344887Schin else
4354887Schin {
436*12068SRoger.Faulkner@Oracle.COM if (delimiter > 0)
4374887Schin sfputc(sp, delimiter);
4384887Schin if ((r = sfstrtell(sp) + 1) <= n)
4394887Schin {
4404887Schin if (!(s = sfstruse(sp)))
4414887Schin r = 0;
4424887Schin else
4434887Schin memcpy(buf, s, r);
4444887Schin }
4454887Schin }
4464887Schin sfstrclose(sp);
4474887Schin return r;
4484887Schin }
449