1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * Phong Vo <kpv@research.att.com> * 20*4887Schin * * 21*4887Schin ***********************************************************************/ 22*4887Schin #pragma prototyped 23*4887Schin 24*4887Schin /* 25*4887Schin * posix regex ed(1) style substitute compile 26*4887Schin */ 27*4887Schin 28*4887Schin #include "reglib.h" 29*4887Schin 30*4887Schin static const regflags_t submap[] = 31*4887Schin { 32*4887Schin 'g', REG_SUB_ALL, 33*4887Schin 'l', REG_SUB_LOWER, 34*4887Schin 'n', REG_SUB_NUMBER, 35*4887Schin 'p', REG_SUB_PRINT, 36*4887Schin 's', REG_SUB_STOP, 37*4887Schin 'u', REG_SUB_UPPER, 38*4887Schin 'w', REG_SUB_WRITE|REG_SUB_LAST, 39*4887Schin 0, 0 40*4887Schin }; 41*4887Schin 42*4887Schin int 43*4887Schin regsubflags(regex_t* p, register const char* s, char** e, int delim, register const regflags_t* map, int* pm, regflags_t* pf) 44*4887Schin { 45*4887Schin register int c; 46*4887Schin register const regflags_t* m; 47*4887Schin regflags_t flags; 48*4887Schin int minmatch; 49*4887Schin regdisc_t* disc; 50*4887Schin 51*4887Schin flags = pf ? *pf : 0; 52*4887Schin minmatch = pm ? *pm : 0; 53*4887Schin if (!map) 54*4887Schin map = submap; 55*4887Schin while (!(flags & REG_SUB_LAST)) 56*4887Schin { 57*4887Schin if (!(c = *s++) || c == delim) 58*4887Schin { 59*4887Schin s--; 60*4887Schin break; 61*4887Schin } 62*4887Schin else if (c >= '0' && c <= '9') 63*4887Schin { 64*4887Schin if (minmatch) 65*4887Schin { 66*4887Schin disc = p->env->disc; 67*4887Schin regfree(p); 68*4887Schin return fatal(disc, REG_EFLAGS, s - 1); 69*4887Schin } 70*4887Schin minmatch = c - '0'; 71*4887Schin while (*s >= '0' && *s <= '9') 72*4887Schin minmatch = minmatch * 10 + *s++ - '0'; 73*4887Schin } 74*4887Schin else 75*4887Schin { 76*4887Schin for (m = map; *m; m++) 77*4887Schin if (*m++ == c) 78*4887Schin { 79*4887Schin if (flags & *m) 80*4887Schin { 81*4887Schin disc = p->env->disc; 82*4887Schin regfree(p); 83*4887Schin return fatal(disc, REG_EFLAGS, s - 1); 84*4887Schin } 85*4887Schin flags |= *m--; 86*4887Schin break; 87*4887Schin } 88*4887Schin if (!*m) 89*4887Schin { 90*4887Schin s--; 91*4887Schin break; 92*4887Schin } 93*4887Schin } 94*4887Schin } 95*4887Schin if (pf) 96*4887Schin *pf = flags; 97*4887Schin if (pm) 98*4887Schin *pm = minmatch; 99*4887Schin if (e) 100*4887Schin *e = (char*)s; 101*4887Schin return 0; 102*4887Schin } 103*4887Schin 104*4887Schin /* 105*4887Schin * compile substitute rhs and optional flags 106*4887Schin */ 107*4887Schin 108*4887Schin int 109*4887Schin regsubcomp(regex_t* p, register const char* s, const regflags_t* map, int minmatch, regflags_t flags) 110*4887Schin { 111*4887Schin register regsub_t* sub; 112*4887Schin register int c; 113*4887Schin register int d; 114*4887Schin register char* t; 115*4887Schin register regsubop_t* op; 116*4887Schin char* e; 117*4887Schin const char* r; 118*4887Schin int sre; 119*4887Schin int f; 120*4887Schin int g; 121*4887Schin int n; 122*4887Schin int nops; 123*4887Schin const char* o; 124*4887Schin regdisc_t* disc; 125*4887Schin 126*4887Schin disc = p->env->disc; 127*4887Schin if (p->env->flags & REG_NOSUB) 128*4887Schin { 129*4887Schin regfree(p); 130*4887Schin return fatal(disc, REG_BADPAT, NiL); 131*4887Schin } 132*4887Schin if (!(sub = (regsub_t*)alloc(p->env->disc, 0, sizeof(regsub_t) + strlen(s))) || !(sub->re_ops = (regsubop_t*)alloc(p->env->disc, 0, (nops = 8) * sizeof(regsubop_t)))) 133*4887Schin { 134*4887Schin if (sub) 135*4887Schin alloc(p->env->disc, sub, 0); 136*4887Schin regfree(p); 137*4887Schin return fatal(disc, REG_ESPACE, s); 138*4887Schin } 139*4887Schin sub->re_buf = sub->re_end = 0; 140*4887Schin p->re_sub = sub; 141*4887Schin p->env->sub = 1; 142*4887Schin op = sub->re_ops; 143*4887Schin o = s; 144*4887Schin if (!(p->env->flags & REG_DELIMITED)) 145*4887Schin d = 0; 146*4887Schin else 147*4887Schin switch (d = *(s - 1)) 148*4887Schin { 149*4887Schin case '\\': 150*4887Schin case '\n': 151*4887Schin case '\r': 152*4887Schin regfree(p); 153*4887Schin return fatal(disc, REG_EDELIM, s); 154*4887Schin } 155*4887Schin sre = p->env->flags & REG_SHELL; 156*4887Schin t = sub->re_rhs; 157*4887Schin if (d) 158*4887Schin { 159*4887Schin r = s; 160*4887Schin for (;;) 161*4887Schin { 162*4887Schin if (!*s) 163*4887Schin { 164*4887Schin if (p->env->flags & REG_MUSTDELIM) 165*4887Schin { 166*4887Schin regfree(p); 167*4887Schin return fatal(disc, REG_EDELIM, r); 168*4887Schin } 169*4887Schin break; 170*4887Schin } 171*4887Schin else if (*s == d) 172*4887Schin { 173*4887Schin flags |= REG_SUB_FULL; 174*4887Schin s++; 175*4887Schin break; 176*4887Schin } 177*4887Schin else if (*s++ == '\\' && !*s++) 178*4887Schin { 179*4887Schin regfree(p); 180*4887Schin return fatal(disc, REG_EESCAPE, r); 181*4887Schin } 182*4887Schin } 183*4887Schin if (*s) 184*4887Schin { 185*4887Schin if (n = regsubflags(p, s, &e, d, map, &minmatch, &flags)) 186*4887Schin return n; 187*4887Schin s = (const char*)e; 188*4887Schin } 189*4887Schin p->re_npat = s - o; 190*4887Schin s = r; 191*4887Schin } 192*4887Schin else 193*4887Schin p->re_npat = 0; 194*4887Schin op->op = f = g = flags & (REG_SUB_LOWER|REG_SUB_UPPER); 195*4887Schin op->off = 0; 196*4887Schin while ((c = *s++) != d) 197*4887Schin { 198*4887Schin again: 199*4887Schin if (!c) 200*4887Schin { 201*4887Schin p->re_npat = s - o - 1; 202*4887Schin break; 203*4887Schin } 204*4887Schin else if (c == '~') 205*4887Schin { 206*4887Schin if (!sre || *s != '(') 207*4887Schin { 208*4887Schin *t++ = c; 209*4887Schin continue; 210*4887Schin } 211*4887Schin r = s - 1; 212*4887Schin s++; 213*4887Schin c = *s++; 214*4887Schin } 215*4887Schin else if (c == '\\') 216*4887Schin { 217*4887Schin if (*s == c) 218*4887Schin { 219*4887Schin *t++ = *s++; 220*4887Schin continue; 221*4887Schin } 222*4887Schin if ((c = *s++) == d) 223*4887Schin goto again; 224*4887Schin if (!c) 225*4887Schin { 226*4887Schin regfree(p); 227*4887Schin return fatal(disc, REG_EESCAPE, s - 2); 228*4887Schin } 229*4887Schin if (sre) 230*4887Schin { 231*4887Schin *t++ = chresc(s - 2, &e); 232*4887Schin s = (const char*)e; 233*4887Schin continue; 234*4887Schin } 235*4887Schin if (c == '&') 236*4887Schin { 237*4887Schin *t++ = c; 238*4887Schin continue; 239*4887Schin } 240*4887Schin } 241*4887Schin else if (c == '&') 242*4887Schin { 243*4887Schin if (sre) 244*4887Schin { 245*4887Schin *t++ = c; 246*4887Schin continue; 247*4887Schin } 248*4887Schin } 249*4887Schin else 250*4887Schin { 251*4887Schin switch (op->op) 252*4887Schin { 253*4887Schin case REG_SUB_UPPER: 254*4887Schin if (islower(c)) 255*4887Schin c = toupper(c); 256*4887Schin break; 257*4887Schin case REG_SUB_LOWER: 258*4887Schin if (isupper(c)) 259*4887Schin c = tolower(c); 260*4887Schin break; 261*4887Schin case REG_SUB_UPPER|REG_SUB_LOWER: 262*4887Schin if (isupper(c)) 263*4887Schin c = tolower(c); 264*4887Schin else if (islower(c)) 265*4887Schin c = toupper(c); 266*4887Schin break; 267*4887Schin } 268*4887Schin *t++ = c; 269*4887Schin continue; 270*4887Schin } 271*4887Schin switch (c) 272*4887Schin { 273*4887Schin case 0: 274*4887Schin s--; 275*4887Schin continue; 276*4887Schin case '&': 277*4887Schin c = 0; 278*4887Schin break; 279*4887Schin case '0': case '1': case '2': case '3': case '4': 280*4887Schin case '5': case '6': case '7': case '8': case '9': 281*4887Schin c -= '0'; 282*4887Schin if (sre) 283*4887Schin while (isdigit(*s)) 284*4887Schin c = c * 10 + *s++ - '0'; 285*4887Schin else if (isdigit(*s) && (p->env->flags & REG_MULTIREF)) 286*4887Schin c = c * 10 + *s++ - '0'; 287*4887Schin break; 288*4887Schin case 'l': 289*4887Schin if (sre) 290*4887Schin { 291*4887Schin if (*s != ')') 292*4887Schin { 293*4887Schin c = -1; 294*4887Schin break; 295*4887Schin } 296*4887Schin s++; 297*4887Schin } 298*4887Schin if (c = *s) 299*4887Schin { 300*4887Schin s++; 301*4887Schin if (isupper(c)) 302*4887Schin c = tolower(c); 303*4887Schin *t++ = c; 304*4887Schin } 305*4887Schin continue; 306*4887Schin case 'u': 307*4887Schin if (sre) 308*4887Schin { 309*4887Schin if (*s != ')') 310*4887Schin { 311*4887Schin c = -1; 312*4887Schin break; 313*4887Schin } 314*4887Schin s++; 315*4887Schin } 316*4887Schin if (c = *s) 317*4887Schin { 318*4887Schin s++; 319*4887Schin if (islower(c)) 320*4887Schin c = toupper(c); 321*4887Schin *t++ = c; 322*4887Schin } 323*4887Schin continue; 324*4887Schin case 'E': 325*4887Schin if (sre) 326*4887Schin { 327*4887Schin if (*s != ')') 328*4887Schin { 329*4887Schin c = -1; 330*4887Schin break; 331*4887Schin } 332*4887Schin s++; 333*4887Schin } 334*4887Schin f = g; 335*4887Schin set: 336*4887Schin if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops) 337*4887Schin { 338*4887Schin if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t)))) 339*4887Schin { 340*4887Schin regfree(p); 341*4887Schin return fatal(disc, REG_ESPACE, NiL); 342*4887Schin } 343*4887Schin op = sub->re_ops + n; 344*4887Schin } 345*4887Schin op->op = f; 346*4887Schin op->off = t - sub->re_rhs; 347*4887Schin continue; 348*4887Schin case 'L': 349*4887Schin if (sre) 350*4887Schin { 351*4887Schin if (*s != ')') 352*4887Schin { 353*4887Schin c = -1; 354*4887Schin break; 355*4887Schin } 356*4887Schin s++; 357*4887Schin } 358*4887Schin g = f; 359*4887Schin f = REG_SUB_LOWER; 360*4887Schin goto set; 361*4887Schin case 'U': 362*4887Schin if (sre) 363*4887Schin { 364*4887Schin if (*s != ')') 365*4887Schin { 366*4887Schin c = -1; 367*4887Schin break; 368*4887Schin } 369*4887Schin s++; 370*4887Schin } 371*4887Schin g = f; 372*4887Schin f = REG_SUB_UPPER; 373*4887Schin goto set; 374*4887Schin default: 375*4887Schin if (!sre) 376*4887Schin { 377*4887Schin *t++ = chresc(s - 2, &e); 378*4887Schin s = (const char*)e; 379*4887Schin continue; 380*4887Schin } 381*4887Schin s--; 382*4887Schin c = -1; 383*4887Schin break; 384*4887Schin } 385*4887Schin if (sre) 386*4887Schin { 387*4887Schin if (c < 0 || *s != ')') 388*4887Schin { 389*4887Schin while (r < s) 390*4887Schin *t++ = *r++; 391*4887Schin continue; 392*4887Schin } 393*4887Schin s++; 394*4887Schin } 395*4887Schin if (c > p->re_nsub) 396*4887Schin { 397*4887Schin regfree(p); 398*4887Schin return fatal(disc, REG_ESUBREG, s - 1); 399*4887Schin } 400*4887Schin if ((n = op - sub->re_ops) >= (nops - 2)) 401*4887Schin { 402*4887Schin if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t)))) 403*4887Schin { 404*4887Schin regfree(p); 405*4887Schin return fatal(disc, REG_ESPACE, NiL); 406*4887Schin } 407*4887Schin op = sub->re_ops + n; 408*4887Schin } 409*4887Schin if (op->len = (t - sub->re_rhs) - op->off) 410*4887Schin op++; 411*4887Schin op->op = f; 412*4887Schin op->off = c; 413*4887Schin op->len = 0; 414*4887Schin op++; 415*4887Schin op->op = f; 416*4887Schin op->off = t - sub->re_rhs; 417*4887Schin } 418*4887Schin if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops) 419*4887Schin { 420*4887Schin if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t)))) 421*4887Schin { 422*4887Schin regfree(p); 423*4887Schin return fatal(disc, REG_ESPACE, NiL); 424*4887Schin } 425*4887Schin op = sub->re_ops + n; 426*4887Schin } 427*4887Schin op->len = -1; 428*4887Schin sub->re_flags = flags; 429*4887Schin sub->re_min = minmatch; 430*4887Schin return 0; 431*4887Schin } 432*4887Schin 433*4887Schin void 434*4887Schin regsubfree(regex_t* p) 435*4887Schin { 436*4887Schin Env_t* env; 437*4887Schin regsub_t* sub; 438*4887Schin 439*4887Schin if (p && (env = p->env) && env->sub && (sub = p->re_sub)) 440*4887Schin { 441*4887Schin env->sub = 0; 442*4887Schin p->re_sub = 0; 443*4887Schin if (!(env->disc->re_flags & REG_NOFREE)) 444*4887Schin { 445*4887Schin if (sub->re_buf) 446*4887Schin alloc(env->disc, sub->re_buf, 0); 447*4887Schin if (sub->re_ops) 448*4887Schin alloc(env->disc, sub->re_ops, 0); 449*4887Schin alloc(env->disc, sub, 0); 450*4887Schin } 451*4887Schin } 452*4887Schin } 453