14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 78462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * Phong Vo <kpv@research.att.com> * 204887Schin * * 214887Schin ***********************************************************************/ 224887Schin #pragma prototyped 234887Schin 244887Schin /* 254887Schin * posix regex decompiler 264887Schin */ 274887Schin 284887Schin #include "reglib.h" 294887Schin 304887Schin #undef ismeta 314887Schin #define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d)) 324887Schin #define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0) 334887Schin 344887Schin static void 354887Schin detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter) 364887Schin { 374887Schin register Trie_node_t* y; 384887Schin char* o; 394887Schin int k; 404887Schin 414887Schin o = p; 424887Schin k = 1; 434887Schin do 444887Schin { 454887Schin if (k) 464887Schin { 474887Schin o = p; 484887Schin if (p < e) 494887Schin *p++ = x->c; 504887Schin } 514887Schin sfputc(sp, x->c); 524887Schin for (y = x->sib; y; y = y->sib) 534887Schin { 544887Schin sfputc(sp, '|'); 554887Schin sfputc(sp, '<'); 564887Schin sfwrite(sp, b, p - b); 574887Schin sfputc(sp, '>'); 584887Schin detrie(y, sp, b, p, e, delimiter); 594887Schin } 604887Schin if (x->end && x->son) 614887Schin { 624887Schin sfputc(sp, '|'); 634887Schin sfputc(sp, '{'); 644887Schin sfwrite(sp, b, p - b); 654887Schin sfputc(sp, '}'); 664887Schin p = o; 674887Schin } 684887Schin } while (x = x->son); 694887Schin } 704887Schin 714887Schin static int 724887Schin decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags) 734887Schin { 744887Schin Rex_t* q; 754887Schin unsigned char* s; 764887Schin unsigned char* t; 774887Schin int c; 78*12068SRoger.Faulkner@Oracle.COM int m; 794887Schin int cb; 804887Schin int cd; 814887Schin int cr; 824887Schin int ib; 834887Schin int ie; 844887Schin int nb; 854887Schin int ne; 864887Schin unsigned char ic[2*UCHAR_MAX]; 874887Schin unsigned char nc[2*UCHAR_MAX]; 884887Schin 894887Schin do 904887Schin { 914887Schin switch (e->type) 924887Schin { 934887Schin case REX_ALT: 944887Schin if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 954887Schin return 1; 964887Schin sfputc(sp, '|'); 974887Schin if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags)) 984887Schin return 1; 994887Schin break; 1004887Schin case REX_BACK: 1014887Schin sfprintf(sp, "\\%d", e->lo); 1024887Schin break; 1034887Schin case REX_BEG: 1044887Schin if (type < SRE) 1054887Schin sfputc(sp, '^'); 1064887Schin break; 1074887Schin case REX_END: 1084887Schin if (type < SRE) 1094887Schin sfputc(sp, '$'); 1104887Schin break; 1114887Schin case REX_WBEG: 1124887Schin meta(sp, '<', type, 1, delimiter); 1134887Schin break; 1144887Schin case REX_WEND: 1154887Schin meta(sp, '<', type, 1, delimiter); 1164887Schin break; 1174887Schin case REX_WORD: 1184887Schin sfprintf(sp, "\\w"); 1194887Schin break; 1204887Schin case REX_CLASS: 1214887Schin case REX_COLL_CLASS: 1224887Schin case REX_ONECHAR: 1234887Schin case REX_DOT: 1244887Schin case REX_REP: 1254887Schin if (type >= SRE) 1264887Schin { 1274887Schin c = ')'; 1284887Schin if (e->hi == RE_DUP_INF) 1294887Schin { 1304887Schin if (!e->lo) 1314887Schin sfputc(sp, '*'); 1324887Schin else if (e->lo == 1) 1334887Schin sfputc(sp, '+'); 1344887Schin else 1354887Schin sfprintf(sp, "{%d,}", e->lo); 1364887Schin } 1374887Schin else if (e->hi != 1) 1384887Schin sfprintf(sp, "{%d,%d}", e->lo, e->hi); 1394887Schin else if (e->lo == 0) 1404887Schin sfputc(sp, '?'); 1414887Schin else 1424887Schin c = 0; 1434887Schin } 1444887Schin switch (e->type) 1454887Schin { 1464887Schin case REX_REP: 1474887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 1484887Schin return 1; 1494887Schin break; 1504887Schin case REX_CLASS: 1514887Schin sfputc(sp, '['); 1524887Schin nb = ne = ib = ie = -2; 1534887Schin cb = cd = cr = 0; 1544887Schin s = nc; 1554887Schin t = ic; 156*12068SRoger.Faulkner@Oracle.COM for (m = 0; m <= UCHAR_MAX; m++) 157*12068SRoger.Faulkner@Oracle.COM if (settst(e->re.charclass, m)) 1584887Schin { 159*12068SRoger.Faulkner@Oracle.COM if (m == ']') 1604887Schin cb = 1; 161*12068SRoger.Faulkner@Oracle.COM else if (m == '-') 1624887Schin cr = 1; 163*12068SRoger.Faulkner@Oracle.COM else if (m == delimiter) 1644887Schin cd = 1; 1654887Schin else if (nb < 0) 166*12068SRoger.Faulkner@Oracle.COM ne = nb = m; 167*12068SRoger.Faulkner@Oracle.COM else if (ne == (m - 1)) 168*12068SRoger.Faulkner@Oracle.COM ne = m; 1694887Schin else 1704887Schin { 1714887Schin if (ne == nb) 1724887Schin *s++ = ne; 1734887Schin else 1744887Schin { 1754887Schin *s++ = nb; 1764887Schin *s++ = '-'; 1774887Schin *s++ = ne; 1784887Schin } 179*12068SRoger.Faulkner@Oracle.COM ne = nb = m; 1804887Schin } 1814887Schin } 1824887Schin else 1834887Schin { 184*12068SRoger.Faulkner@Oracle.COM if (m == ']') 1854887Schin cb = -1; 186*12068SRoger.Faulkner@Oracle.COM else if (m == '-') 1874887Schin cr = -1; 188*12068SRoger.Faulkner@Oracle.COM else if (m == delimiter) 1894887Schin cd = -1; 1904887Schin else if (ib < 0) 191*12068SRoger.Faulkner@Oracle.COM ie = ib = m; 192*12068SRoger.Faulkner@Oracle.COM else if (ie == (m - 1)) 193*12068SRoger.Faulkner@Oracle.COM ie = m; 1944887Schin else 1954887Schin { 1964887Schin if (ie == ib) 1974887Schin *t++ = ie; 1984887Schin else 1994887Schin { 2004887Schin *t++ = ib; 2014887Schin *t++ = '-'; 2024887Schin *t++ = ie; 2034887Schin } 204*12068SRoger.Faulkner@Oracle.COM ie = ib = m; 2054887Schin } 2064887Schin } 2074887Schin if (nb >= 0) 2084887Schin { 2094887Schin *s++ = nb; 2104887Schin if (ne != nb) 2114887Schin { 2124887Schin *s++ = '-'; 2134887Schin *s++ = ne; 2144887Schin } 2154887Schin } 2164887Schin if (ib >= 0) 2174887Schin { 2184887Schin *t++ = ib; 2194887Schin if (ie != ib) 2204887Schin { 2214887Schin *t++ = '-'; 2224887Schin *t++ = ie; 2234887Schin } 2244887Schin } 2254887Schin if ((t - ic + 1) < (s - nc + (nc[0] == '^'))) 2264887Schin { 2274887Schin sfputc(sp, '^'); 2284887Schin if (cb < 0) 2294887Schin sfputc(sp, ']'); 2304887Schin if (cr < 0) 2314887Schin sfputc(sp, '-'); 232*12068SRoger.Faulkner@Oracle.COM if (cd < 0 && delimiter > 0) 2334887Schin { 2344887Schin if (flags & REG_ESCAPE) 2354887Schin sfputc(sp, '\\'); 2364887Schin sfputc(sp, delimiter); 2374887Schin } 2384887Schin sfwrite(sp, ic, t - ic); 2394887Schin } 2404887Schin else 2414887Schin { 2424887Schin if (cb > 0) 2434887Schin sfputc(sp, ']'); 2444887Schin if (cr > 0) 2454887Schin sfputc(sp, '-'); 246*12068SRoger.Faulkner@Oracle.COM if (cd > 0 && delimiter > 0) 2474887Schin { 2484887Schin if (flags & REG_ESCAPE) 2494887Schin sfputc(sp, '\\'); 2504887Schin sfputc(sp, delimiter); 2514887Schin } 2524887Schin if (nc[0] == '^') 2534887Schin { 2544887Schin sfwrite(sp, nc + 1, s - nc - 1); 2554887Schin sfputc(sp, '^'); 2564887Schin } 2574887Schin else 2584887Schin sfwrite(sp, nc, s - nc); 2594887Schin } 2604887Schin sfputc(sp, ']'); 2614887Schin break; 2624887Schin case REX_COLL_CLASS: 2634887Schin break; 2644887Schin case REX_ONECHAR: 2654887Schin meta(sp, e->re.onechar, type, 0, delimiter); 2664887Schin break; 2674887Schin case REX_DOT: 2684887Schin sfputc(sp, '.'); 2694887Schin break; 2704887Schin } 2714887Schin if (type < SRE) 2724887Schin { 2734887Schin if (e->hi == RE_DUP_INF) 2744887Schin { 2754887Schin if (!e->lo) 2764887Schin sfputc(sp, '*'); 2774887Schin else if (e->lo == 1 && ismeta('+', type, 0, delimiter)) 2784887Schin meta(sp, '+', type, 1, delimiter); 2794887Schin else 2804887Schin { 2814887Schin meta(sp, '{', type, 1, delimiter); 2824887Schin sfprintf(sp, "%d,", e->lo); 2834887Schin meta(sp, '}', type, 1, delimiter); 2844887Schin } 2854887Schin } 2864887Schin else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter)) 2874887Schin { 2884887Schin meta(sp, '{', type, 1, delimiter); 2894887Schin sfprintf(sp, "%d,%d", e->lo, e->hi); 2904887Schin meta(sp, '}', type, 1, delimiter); 2914887Schin } 2924887Schin else if (e->lo == 0) 2934887Schin meta(sp, '?', type, 1, delimiter); 2944887Schin } 2954887Schin else if (c) 2964887Schin sfputc(sp, c); 2974887Schin break; 2984887Schin case REX_STRING: 2994887Schin case REX_KMP: 3004887Schin t = (s = e->re.string.base) + e->re.string.size; 3014887Schin while (s < t) 3024887Schin { 3034887Schin c = *s++; 3044887Schin meta(sp, c, type, 0, delimiter); 3054887Schin } 3064887Schin break; 3074887Schin case REX_TRIE: 3084887Schin ib = 0; 3094887Schin for (c = 0; c <= UCHAR_MAX; c++) 3104887Schin if (e->re.trie.root[c]) 3114887Schin { 3124887Schin char pfx[1024]; 3134887Schin 3144887Schin if (ib) 3154887Schin sfputc(sp, '|'); 3164887Schin else 3174887Schin ib = 1; 3184887Schin detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter); 3194887Schin } 3204887Schin break; 3214887Schin case REX_NEG: 3224887Schin if (type >= SRE) 3234887Schin sfprintf(sp, "!("); 3244887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 3254887Schin return 1; 3264887Schin if (type >= SRE) 3274887Schin sfputc(sp, ')'); 3284887Schin else 3294887Schin sfputc(sp, '!'); 3304887Schin break; 3314887Schin case REX_CONJ: 3324887Schin if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 3334887Schin return 1; 3344887Schin sfputc(sp, '&'); 3354887Schin if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags)) 3364887Schin return 1; 3374887Schin break; 3384887Schin case REX_GROUP: 3394887Schin if (type >= SRE) 3404887Schin sfputc(sp, '@'); 3414887Schin meta(sp, '(', type, 1, delimiter); 3424887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 3434887Schin return 1; 3444887Schin meta(sp, ')', type, 1, delimiter); 3454887Schin break; 3464887Schin case REX_GROUP_AHEAD: 3474887Schin case REX_GROUP_AHEAD_NOT: 3484887Schin case REX_GROUP_BEHIND: 3494887Schin case REX_GROUP_BEHIND_NOT: 3504887Schin meta(sp, '(', type, 1, delimiter); 3514887Schin sfputc(sp, '?'); 3524887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 3534887Schin return 1; 3544887Schin meta(sp, ')', type, 1, delimiter); 3554887Schin break; 3564887Schin case REX_GROUP_COND: 3574887Schin meta(sp, '(', type, 1, delimiter); 3584887Schin sfputc(sp, '?'); 3594887Schin if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 3604887Schin return 1; 3614887Schin if (q = e->re.group.expr.binary.right) 3624887Schin { 3634887Schin sfputc(sp, ':'); 3644887Schin if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags)) 3654887Schin return 1; 3664887Schin sfputc(sp, ':'); 3674887Schin if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags)) 3684887Schin return 1; 3694887Schin } 3704887Schin meta(sp, ')', type, 1, delimiter); 3714887Schin break; 3724887Schin case REX_GROUP_CUT: 3734887Schin meta(sp, '(', type, 1, delimiter); 3744887Schin sfputc(sp, '?'); 3754887Schin if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 3764887Schin return 1; 3774887Schin meta(sp, ')', type, 1, delimiter); 3784887Schin break; 3794887Schin case REX_BM: 3804887Schin break; 3814887Schin default: 3824887Schin sfprintf(sp, "<ERROR:REX_%d>", e->type); 3834887Schin break; 3844887Schin } 3854887Schin } while (e = e->next); 3864887Schin return 0; 3874887Schin } 3884887Schin 3894887Schin /* 3904887Schin * reconstruct pattern from compiled re p into sp 3914887Schin */ 3924887Schin 3934887Schin size_t 3944887Schin regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n) 3954887Schin { 3964887Schin Sfio_t* sp; 3974887Schin char* s; 3984887Schin int type; 3994887Schin int delimiter; 4004887Schin size_t r; 4014887Schin 4024887Schin if (!(sp = sfstropen())) 4034887Schin return 0; 404*12068SRoger.Faulkner@Oracle.COM if (flags == (regflags_t)~0) 4054887Schin flags = p->env->flags; 4064887Schin switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) 4074887Schin { 4084887Schin case 0: 4094887Schin type = BRE; 4104887Schin break; 4114887Schin case REG_AUGMENTED: 4124887Schin case REG_AUGMENTED|REG_EXTENDED: 4134887Schin type = ARE; 4144887Schin break; 4154887Schin case REG_EXTENDED: 4164887Schin type = ERE; 4174887Schin break; 4184887Schin case REG_SHELL: 4194887Schin type = SRE; 4204887Schin break; 4214887Schin default: 4224887Schin type = KRE; 4234887Schin break; 4244887Schin } 4254887Schin if (flags & REG_DELIMITED) 4264887Schin { 4274887Schin delimiter = '/'; 4284887Schin sfputc(sp, delimiter); 4294887Schin } 4304887Schin else 431*12068SRoger.Faulkner@Oracle.COM delimiter = -1; 4324887Schin if (decomp(p->env->rex, sp, type, delimiter, flags)) 4334887Schin r = 0; 4344887Schin else 4354887Schin { 436*12068SRoger.Faulkner@Oracle.COM if (delimiter > 0) 4374887Schin sfputc(sp, delimiter); 4384887Schin if ((r = sfstrtell(sp) + 1) <= n) 4394887Schin { 4404887Schin if (!(s = sfstruse(sp))) 4414887Schin r = 0; 4424887Schin else 4434887Schin memcpy(buf, s, r); 4444887Schin } 4454887Schin } 4464887Schin sfstrclose(sp); 4474887Schin return r; 4484887Schin } 449