1*55845Sbostic /*- 2*55845Sbostic * Copyright (c) 1992 Henry Spencer. 3*55845Sbostic * Copyright (c) 1992 The Regents of the University of California. 4*55845Sbostic * All rights reserved. 5*55845Sbostic * 6*55845Sbostic * This code is derived from software contributed to Berkeley by 7*55845Sbostic * Henry Spencer of the University of Toronto. 8*55845Sbostic * 9*55845Sbostic * %sccs.include.redist.c% 10*55845Sbostic * 11*55845Sbostic * @(#)engine.c 5.1 (Berkeley) 08/06/92 12*55845Sbostic */ 13*55845Sbostic 14*55845Sbostic /* 15*55845Sbostic * The matching engine and friends. This file is #included by regexec.c 16*55845Sbostic * after suitable #defines of a variety of macros used herein, so that 17*55845Sbostic * different state representations can be used without duplicating masses 18*55845Sbostic * of code. 19*55845Sbostic */ 20*55845Sbostic 21*55845Sbostic #ifdef SNAMES 22*55845Sbostic #define matcher smatcher 23*55845Sbostic #define fast sfast 24*55845Sbostic #define slow sslow 25*55845Sbostic #define dissect sdissect 26*55845Sbostic #define backref sbackref 27*55845Sbostic #define expand sexpand 28*55845Sbostic #define step sstep 29*55845Sbostic #define print sprint 30*55845Sbostic #define match smat 31*55845Sbostic #endif 32*55845Sbostic #ifdef LNAMES 33*55845Sbostic #define matcher lmatcher 34*55845Sbostic #define fast lfast 35*55845Sbostic #define slow lslow 36*55845Sbostic #define dissect ldissect 37*55845Sbostic #define backref lbackref 38*55845Sbostic #define expand lexpand 39*55845Sbostic #define step lstep 40*55845Sbostic #define print lprint 41*55845Sbostic #define match lmat 42*55845Sbostic #endif 43*55845Sbostic 44*55845Sbostic /* another structure passed up and down to avoid zillions of parameters */ 45*55845Sbostic struct match { 46*55845Sbostic struct re_guts *g; 47*55845Sbostic int eflags; 48*55845Sbostic regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 49*55845Sbostic uchar *offp; /* offsets work from here */ 50*55845Sbostic uchar *beginp; /* start of string -- virtual NUL precedes */ 51*55845Sbostic uchar *endp; /* end of string -- virtual NUL here */ 52*55845Sbostic uchar *coldp; /* can be no match starting before here */ 53*55845Sbostic uchar **lastpos; /* [nplus+1] */ 54*55845Sbostic STATEVARS; 55*55845Sbostic states st; /* current states */ 56*55845Sbostic states fresh; /* states for a fresh start */ 57*55845Sbostic states tmp; /* temporary */ 58*55845Sbostic states empty; /* empty set of states */ 59*55845Sbostic }; 60*55845Sbostic 61*55845Sbostic #ifndef NDEBUG 62*55845Sbostic STATIC void print(); 63*55845Sbostic extern char *regchar(); 64*55845Sbostic #define SP(t, s, c) { if (m->eflags®_TRACE) print(m->g, t, s, c, stdout); } 65*55845Sbostic #define DO(t, p1, p2, s1, s2) { if (m->eflags®_TRACE) { \ 66*55845Sbostic printf("%s %s-", t, regchar(*(p1))); \ 67*55845Sbostic printf("%s ", regchar(*(p2))); \ 68*55845Sbostic printf("%ld-%ld\n", s1, s2); } } 69*55845Sbostic #else 70*55845Sbostic #define SP(t, s, c) /* nothing */ 71*55845Sbostic #define DO(t, p1, p2, s1, s2) /* nothing */ 72*55845Sbostic #endif 73*55845Sbostic 74*55845Sbostic STATIC uchar *fast(); 75*55845Sbostic STATIC uchar *slow(); 76*55845Sbostic STATIC uchar *dissect(); 77*55845Sbostic STATIC uchar *backref(); 78*55845Sbostic STATIC states expand(); 79*55845Sbostic STATIC states step(); 80*55845Sbostic 81*55845Sbostic /* 82*55845Sbostic - matcher - the actual matching engine 83*55845Sbostic */ 84*55845Sbostic static int /* 0 success, REG_NOMATCH failure */ 85*55845Sbostic matcher(g, string, nmatch, pmatch, eflags) 86*55845Sbostic register struct re_guts *g; 87*55845Sbostic uchar *string; 88*55845Sbostic size_t nmatch; 89*55845Sbostic regmatch_t pmatch[]; 90*55845Sbostic int eflags; 91*55845Sbostic { 92*55845Sbostic register uchar *endp; 93*55845Sbostic register int i; 94*55845Sbostic struct match mv; 95*55845Sbostic register struct match *m = &mv; 96*55845Sbostic register uchar *dp; 97*55845Sbostic const register sopno gf = g->firststate+1; /* +1 for OEND */ 98*55845Sbostic const register sopno gl = g->laststate; 99*55845Sbostic uchar *start; 100*55845Sbostic uchar *stop; 101*55845Sbostic 102*55845Sbostic if (g->cflags®_NOSUB) 103*55845Sbostic nmatch = 0; /* simplify tests */ 104*55845Sbostic if (eflags®_STARTEND) { 105*55845Sbostic start = string + pmatch[0].rm_so; 106*55845Sbostic stop = string + pmatch[0].rm_eo; 107*55845Sbostic } else { 108*55845Sbostic start = string; 109*55845Sbostic stop = start + strlen((char *)start); 110*55845Sbostic } 111*55845Sbostic 112*55845Sbostic /* match struct setup */ 113*55845Sbostic m->g = g; 114*55845Sbostic m->eflags = eflags; 115*55845Sbostic m->pmatch = NULL; 116*55845Sbostic m->lastpos = NULL; 117*55845Sbostic m->offp = string; 118*55845Sbostic m->beginp = start; 119*55845Sbostic m->endp = stop; 120*55845Sbostic STATESETUP(m, 4); 121*55845Sbostic SETUP(m->st); 122*55845Sbostic SETUP(m->fresh); 123*55845Sbostic SETUP(m->tmp); 124*55845Sbostic SETUP(m->empty); 125*55845Sbostic CLEAR(m->empty); 126*55845Sbostic 127*55845Sbostic /* this loop does only one repetition except for backrefs */ 128*55845Sbostic for (;;) { 129*55845Sbostic endp = fast(m, start, stop, gf, gl); 130*55845Sbostic if (endp == NULL) { /* a miss */ 131*55845Sbostic STATETEARDOWN(m); 132*55845Sbostic return(REG_NOMATCH); 133*55845Sbostic } 134*55845Sbostic if (nmatch == 0 && !g->backrefs) 135*55845Sbostic break; /* no further info needed */ 136*55845Sbostic 137*55845Sbostic /* where? */ 138*55845Sbostic assert(m->coldp != NULL); 139*55845Sbostic for (;;) { 140*55845Sbostic endp = slow(m, m->coldp, stop, gf, gl); 141*55845Sbostic if (endp != NULL) 142*55845Sbostic break; 143*55845Sbostic assert(*m->coldp != '\0'); 144*55845Sbostic m->coldp++; 145*55845Sbostic } 146*55845Sbostic if (nmatch == 1 && !g->backrefs) 147*55845Sbostic break; /* no further info needed */ 148*55845Sbostic 149*55845Sbostic /* oh my, he wants the subexpressions... */ 150*55845Sbostic if (m->pmatch == NULL) 151*55845Sbostic m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 152*55845Sbostic sizeof(regmatch_t)); 153*55845Sbostic if (m->pmatch == NULL) { 154*55845Sbostic STATETEARDOWN(m); 155*55845Sbostic return(REG_ESPACE); 156*55845Sbostic } 157*55845Sbostic for (i = 1; i <= m->g->nsub; i++) { 158*55845Sbostic m->pmatch[i].rm_so = -1; 159*55845Sbostic m->pmatch[i].rm_eo = -1; 160*55845Sbostic } 161*55845Sbostic if (!g->backrefs) 162*55845Sbostic dp = dissect(m, m->coldp, endp, gf, gl); 163*55845Sbostic else { 164*55845Sbostic if (g->nplus > 0 && m->lastpos == NULL) 165*55845Sbostic m->lastpos = (uchar **)malloc((g->nplus+1) * 166*55845Sbostic sizeof(uchar *)); 167*55845Sbostic if (g->nplus > 0 && m->lastpos == NULL) { 168*55845Sbostic free(m->pmatch); 169*55845Sbostic STATETEARDOWN(m); 170*55845Sbostic return(REG_ESPACE); 171*55845Sbostic } 172*55845Sbostic dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 173*55845Sbostic } 174*55845Sbostic if (dp != NULL) 175*55845Sbostic break; 176*55845Sbostic 177*55845Sbostic /* uh-oh... we couldn't find a subexpression-level match */ 178*55845Sbostic assert(g->backrefs); /* must be back references doing it */ 179*55845Sbostic assert(g->nplus == 0 || m->lastpos != NULL); 180*55845Sbostic while (dp == NULL && endp > m->coldp && 181*55845Sbostic (endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) { 182*55845Sbostic /* try it on a shorter possibility */ 183*55845Sbostic for (i = 1; i <= m->g->nsub; i++) { 184*55845Sbostic m->pmatch[i].rm_so = -1; 185*55845Sbostic m->pmatch[i].rm_eo = -1; 186*55845Sbostic } 187*55845Sbostic dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 188*55845Sbostic } 189*55845Sbostic assert(dp == NULL || dp == endp); 190*55845Sbostic if (dp != NULL) /* found a shorter one */ 191*55845Sbostic break; 192*55845Sbostic 193*55845Sbostic /* despite initial appearances, there is no match here */ 194*55845Sbostic start = m->coldp + 1; /* recycle starting later */ 195*55845Sbostic assert(start <= stop); 196*55845Sbostic } 197*55845Sbostic 198*55845Sbostic /* fill in the details if requested */ 199*55845Sbostic if (nmatch > 0) { 200*55845Sbostic pmatch[0].rm_so = m->coldp - m->offp; 201*55845Sbostic pmatch[0].rm_eo = endp - m->offp; 202*55845Sbostic } 203*55845Sbostic if (nmatch > 1) { 204*55845Sbostic assert(m->pmatch != NULL); 205*55845Sbostic for (i = 1; i < nmatch; i++) 206*55845Sbostic if (i <= m->g->nsub) 207*55845Sbostic pmatch[i] = m->pmatch[i]; 208*55845Sbostic else { 209*55845Sbostic pmatch[i].rm_so = -1; 210*55845Sbostic pmatch[i].rm_eo = -1; 211*55845Sbostic } 212*55845Sbostic } 213*55845Sbostic 214*55845Sbostic if (m->pmatch != NULL) 215*55845Sbostic free((char *)m->pmatch); 216*55845Sbostic if (m->lastpos != NULL) 217*55845Sbostic free((char *)m->lastpos); 218*55845Sbostic STATETEARDOWN(m); 219*55845Sbostic return(0); 220*55845Sbostic } 221*55845Sbostic 222*55845Sbostic /* 223*55845Sbostic - dissect - figure out what matched what, no back references 224*55845Sbostic */ 225*55845Sbostic static uchar * /* == stop (success) always */ 226*55845Sbostic dissect(m, start, stop, startst, stopst) 227*55845Sbostic register struct match *m; 228*55845Sbostic uchar *start; 229*55845Sbostic uchar *stop; 230*55845Sbostic sopno startst; 231*55845Sbostic sopno stopst; 232*55845Sbostic { 233*55845Sbostic register int i; 234*55845Sbostic register sopno ss; /* start sop of current subRE */ 235*55845Sbostic register sopno es; /* end sop of current subRE */ 236*55845Sbostic register uchar *sp; /* start of string matched by it */ 237*55845Sbostic register uchar *stp; /* string matched by it cannot pass here */ 238*55845Sbostic register uchar *rest; /* start of rest of string */ 239*55845Sbostic register uchar *tail; /* string unmatched by rest of RE */ 240*55845Sbostic register sopno ssub; /* start sop of subsubRE */ 241*55845Sbostic register sopno esub; /* end sop of subsubRE */ 242*55845Sbostic register uchar *ssp; /* start of string matched by subsubRE */ 243*55845Sbostic register uchar *sep; /* end of string matched by subsubRE */ 244*55845Sbostic register uchar *oldssp; /* previous ssp */ 245*55845Sbostic register uchar *dp; 246*55845Sbostic register size_t len; 247*55845Sbostic 248*55845Sbostic DO("diss", start, stop, startst, stopst); 249*55845Sbostic sp = start; 250*55845Sbostic for (ss = startst; ss < stopst; ss = es) { 251*55845Sbostic /* identify end of subRE */ 252*55845Sbostic es = ss; 253*55845Sbostic switch (OP(m->g->strip[es])) { 254*55845Sbostic case OPLUS_: 255*55845Sbostic case OQUEST_: 256*55845Sbostic es += OPND(m->g->strip[es]); 257*55845Sbostic break; 258*55845Sbostic case OCH_: 259*55845Sbostic while (OP(m->g->strip[es]) != O_CH) 260*55845Sbostic es += OPND(m->g->strip[es]); 261*55845Sbostic break; 262*55845Sbostic } 263*55845Sbostic es++; 264*55845Sbostic 265*55845Sbostic /* figure out what it matched */ 266*55845Sbostic switch (OP(m->g->strip[ss])) { 267*55845Sbostic case OEND: 268*55845Sbostic assert(0); 269*55845Sbostic break; 270*55845Sbostic case OCHAR: 271*55845Sbostic sp++; 272*55845Sbostic break; 273*55845Sbostic case OBOL: 274*55845Sbostic case OEOL: 275*55845Sbostic break; 276*55845Sbostic case OANY: 277*55845Sbostic case OANYOF: 278*55845Sbostic sp++; 279*55845Sbostic break; 280*55845Sbostic case OBACK_: 281*55845Sbostic case O_BACK: 282*55845Sbostic assert(0); 283*55845Sbostic break; 284*55845Sbostic /* cases where length of match is hard to find */ 285*55845Sbostic case OQUEST_: 286*55845Sbostic stp = stop; 287*55845Sbostic for (;;) { 288*55845Sbostic /* how long could this one be? */ 289*55845Sbostic rest = slow(m, sp, stp, ss, es); 290*55845Sbostic assert(rest != NULL); /* it did match */ 291*55845Sbostic /* could the rest match the rest? */ 292*55845Sbostic tail = slow(m, rest, stop, es, stopst); 293*55845Sbostic if (tail == stop) 294*55845Sbostic break; /* yes! */ 295*55845Sbostic /* no -- try a shorter match for this one */ 296*55845Sbostic stp = rest - 1; 297*55845Sbostic assert(stp >= sp); /* it did work */ 298*55845Sbostic } 299*55845Sbostic ssub = ss + 1; 300*55845Sbostic esub = es - 1; 301*55845Sbostic /* did innards match? */ 302*55845Sbostic if (slow(m, sp, rest, ssub, esub) != NULL) { 303*55845Sbostic dp = dissect(m, sp, rest, ssub, esub); 304*55845Sbostic assert(dp == rest); 305*55845Sbostic } else /* no */ 306*55845Sbostic assert(sp == rest); 307*55845Sbostic sp = rest; 308*55845Sbostic break; 309*55845Sbostic case OPLUS_: 310*55845Sbostic stp = stop; 311*55845Sbostic for (;;) { 312*55845Sbostic /* how long could this one be? */ 313*55845Sbostic rest = slow(m, sp, stp, ss, es); 314*55845Sbostic assert(rest != NULL); /* it did match */ 315*55845Sbostic /* could the rest match the rest? */ 316*55845Sbostic tail = slow(m, rest, stop, es, stopst); 317*55845Sbostic if (tail == stop) 318*55845Sbostic break; /* yes! */ 319*55845Sbostic /* no -- try a shorter match for this one */ 320*55845Sbostic stp = rest - 1; 321*55845Sbostic assert(stp >= sp); /* it did work */ 322*55845Sbostic } 323*55845Sbostic ssub = ss + 1; 324*55845Sbostic esub = es - 1; 325*55845Sbostic ssp = sp; 326*55845Sbostic oldssp = ssp; 327*55845Sbostic for (;;) { /* find last match of innards */ 328*55845Sbostic sep = slow(m, ssp, rest, ssub, esub); 329*55845Sbostic if (sep == NULL || sep == ssp) 330*55845Sbostic break; /* failed or matched null */ 331*55845Sbostic oldssp = ssp; /* on to next try */ 332*55845Sbostic ssp = sep; 333*55845Sbostic } 334*55845Sbostic if (sep == NULL) { 335*55845Sbostic /* last successful match */ 336*55845Sbostic sep = ssp; 337*55845Sbostic ssp = oldssp; 338*55845Sbostic } 339*55845Sbostic assert(sep == rest); /* must exhaust substring */ 340*55845Sbostic assert(slow(m, ssp, sep, ssub, esub) == rest); 341*55845Sbostic dp = dissect(m, ssp, sep, ssub, esub); 342*55845Sbostic assert(dp == sep); 343*55845Sbostic sp = rest; 344*55845Sbostic break; 345*55845Sbostic case OCH_: 346*55845Sbostic stp = stop; 347*55845Sbostic for (;;) { 348*55845Sbostic /* how long could this one be? */ 349*55845Sbostic rest = slow(m, sp, stp, ss, es); 350*55845Sbostic assert(rest != NULL); /* it did match */ 351*55845Sbostic /* could the rest match the rest? */ 352*55845Sbostic tail = slow(m, rest, stop, es, stopst); 353*55845Sbostic if (tail == stop) 354*55845Sbostic break; /* yes! */ 355*55845Sbostic /* no -- try a shorter match for this one */ 356*55845Sbostic stp = rest - 1; 357*55845Sbostic assert(stp >= sp); /* it did work */ 358*55845Sbostic } 359*55845Sbostic ssub = ss + 1; 360*55845Sbostic esub = ss + OPND(m->g->strip[ss]) - 1; 361*55845Sbostic assert(OP(m->g->strip[esub]) == OOR1); 362*55845Sbostic for (;;) { /* find first matching branch */ 363*55845Sbostic if (slow(m, sp, rest, ssub, esub) == rest) 364*55845Sbostic break; /* it matched all of it */ 365*55845Sbostic /* that one missed, try next one */ 366*55845Sbostic assert(OP(m->g->strip[esub]) == OOR1); 367*55845Sbostic esub++; 368*55845Sbostic assert(OP(m->g->strip[esub]) == OOR2); 369*55845Sbostic ssub = esub + 1; 370*55845Sbostic esub += OPND(m->g->strip[esub]); 371*55845Sbostic if (OP(m->g->strip[esub]) == OOR2) 372*55845Sbostic esub--; 373*55845Sbostic else 374*55845Sbostic assert(OP(m->g->strip[esub]) == O_CH); 375*55845Sbostic } 376*55845Sbostic dp = dissect(m, sp, rest, ssub, esub); 377*55845Sbostic assert(dp == rest); 378*55845Sbostic sp = rest; 379*55845Sbostic break; 380*55845Sbostic case O_PLUS: 381*55845Sbostic case O_QUEST: 382*55845Sbostic case OOR1: 383*55845Sbostic case OOR2: 384*55845Sbostic case O_CH: 385*55845Sbostic assert(0); 386*55845Sbostic break; 387*55845Sbostic case OLPAREN: 388*55845Sbostic i = OPND(m->g->strip[ss]); 389*55845Sbostic m->pmatch[i].rm_so = sp - m->offp; 390*55845Sbostic break; 391*55845Sbostic case ORPAREN: 392*55845Sbostic i = OPND(m->g->strip[ss]); 393*55845Sbostic m->pmatch[i].rm_eo = sp - m->offp; 394*55845Sbostic break; 395*55845Sbostic default: /* uh oh */ 396*55845Sbostic assert(0); 397*55845Sbostic break; 398*55845Sbostic } 399*55845Sbostic } 400*55845Sbostic 401*55845Sbostic assert(sp == stop); 402*55845Sbostic return(sp); 403*55845Sbostic } 404*55845Sbostic 405*55845Sbostic /* 406*55845Sbostic - backref - figure out what matched what, figuring in back references 407*55845Sbostic */ 408*55845Sbostic static uchar * /* == stop (success) or NULL (failure) */ 409*55845Sbostic backref(m, start, stop, startst, stopst, lev) 410*55845Sbostic register struct match *m; 411*55845Sbostic uchar *start; 412*55845Sbostic uchar *stop; 413*55845Sbostic sopno startst; 414*55845Sbostic sopno stopst; 415*55845Sbostic sopno lev; /* PLUS nesting level */ 416*55845Sbostic { 417*55845Sbostic register int i; 418*55845Sbostic register sopno ss; /* start sop of current subRE */ 419*55845Sbostic register uchar *sp; /* start of string matched by it */ 420*55845Sbostic register sopno ssub; /* start sop of subsubRE */ 421*55845Sbostic register sopno esub; /* end sop of subsubRE */ 422*55845Sbostic register uchar *ssp; /* start of string matched by subsubRE */ 423*55845Sbostic register uchar *dp; 424*55845Sbostic register size_t len; 425*55845Sbostic register int hard; 426*55845Sbostic register sop s; 427*55845Sbostic register regoff_t offsave; 428*55845Sbostic register cset *cs; 429*55845Sbostic 430*55845Sbostic DO("back", start, stop, startst, stopst); 431*55845Sbostic sp = start; 432*55845Sbostic 433*55845Sbostic /* get as far as we can with easy stuff */ 434*55845Sbostic hard = 0; 435*55845Sbostic for (ss = startst; !hard && ss < stopst; ss++) 436*55845Sbostic switch (OP(s = m->g->strip[ss])) { 437*55845Sbostic case OCHAR: 438*55845Sbostic if (sp == stop || *sp++ != OPND(s)) 439*55845Sbostic return(NULL); 440*55845Sbostic break; 441*55845Sbostic case OANY: 442*55845Sbostic if (sp == stop) 443*55845Sbostic return(NULL); 444*55845Sbostic sp++; 445*55845Sbostic break; 446*55845Sbostic case OANYOF: 447*55845Sbostic cs = &m->g->sets[OPND(s)]; 448*55845Sbostic if (sp == stop || !CHIN(cs, *sp++)) 449*55845Sbostic return(NULL); 450*55845Sbostic break; 451*55845Sbostic case OBOL: 452*55845Sbostic if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 453*55845Sbostic (sp < m->endp && *(sp-1) == '\n' && 454*55845Sbostic (m->g->cflags®_NEWLINE)) ) 455*55845Sbostic { /* yes */ } 456*55845Sbostic else 457*55845Sbostic return(NULL); 458*55845Sbostic break; 459*55845Sbostic case OEOL: 460*55845Sbostic if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 461*55845Sbostic (sp < m->endp && *sp == '\n' && 462*55845Sbostic (m->g->cflags®_NEWLINE)) ) 463*55845Sbostic { /* yes */ } 464*55845Sbostic else 465*55845Sbostic return(NULL); 466*55845Sbostic break; 467*55845Sbostic case O_QUEST: 468*55845Sbostic break; 469*55845Sbostic case OOR1: /* matches null but needs to skip */ 470*55845Sbostic ss++; 471*55845Sbostic s = m->g->strip[ss]; 472*55845Sbostic do { 473*55845Sbostic assert(OP(s) == OOR2); 474*55845Sbostic ss += OPND(s); 475*55845Sbostic } while (OP(s = m->g->strip[ss]) != O_CH); 476*55845Sbostic /* note that the ss++ gets us past the O_CH */ 477*55845Sbostic break; 478*55845Sbostic default: /* have to make a choice */ 479*55845Sbostic hard = 1; 480*55845Sbostic break; 481*55845Sbostic } 482*55845Sbostic if (!hard) { /* that was it! */ 483*55845Sbostic if (sp != stop) 484*55845Sbostic return(NULL); 485*55845Sbostic return(sp); 486*55845Sbostic } 487*55845Sbostic ss--; /* adjust for the for's final increment */ 488*55845Sbostic 489*55845Sbostic /* the hard stuff */ 490*55845Sbostic DO("hard", sp, stop, ss, stopst); 491*55845Sbostic s = m->g->strip[ss]; 492*55845Sbostic switch (OP(s)) { 493*55845Sbostic case OBACK_: /* the vilest depths */ 494*55845Sbostic i = OPND(s); 495*55845Sbostic if (m->pmatch[i].rm_eo == -1) 496*55845Sbostic return(NULL); 497*55845Sbostic assert(m->pmatch[i].rm_so != -1); 498*55845Sbostic len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 499*55845Sbostic assert(stop - m->beginp >= len); 500*55845Sbostic if (sp > stop - len) 501*55845Sbostic return(NULL); /* not enough left to match */ 502*55845Sbostic ssp = m->offp + m->pmatch[i].rm_so; 503*55845Sbostic if (strncmp((char *)sp, (char *)ssp, len) != 0) 504*55845Sbostic return(NULL); 505*55845Sbostic while (m->g->strip[ss] != SOP(O_BACK, i)) 506*55845Sbostic ss++; 507*55845Sbostic return(backref(m, sp+len, stop, ss+1, stopst, lev)); 508*55845Sbostic break; 509*55845Sbostic case OQUEST_: /* to null or not */ 510*55845Sbostic dp = backref(m, sp, stop, ss+1, stopst, lev); 511*55845Sbostic if (dp != NULL) 512*55845Sbostic return(dp); /* not */ 513*55845Sbostic return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 514*55845Sbostic break; 515*55845Sbostic case OPLUS_: 516*55845Sbostic assert(m->lastpos != NULL); 517*55845Sbostic assert(lev+1 <= m->g->nplus); 518*55845Sbostic m->lastpos[lev+1] = sp; 519*55845Sbostic return(backref(m, sp, stop, ss+1, stopst, lev+1)); 520*55845Sbostic break; 521*55845Sbostic case O_PLUS: 522*55845Sbostic if (sp == m->lastpos[lev]) /* last pass matched null */ 523*55845Sbostic return(backref(m, sp, stop, ss+1, stopst, lev-1)); 524*55845Sbostic /* try another pass */ 525*55845Sbostic m->lastpos[lev] = sp; 526*55845Sbostic dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 527*55845Sbostic if (dp == NULL) 528*55845Sbostic return(backref(m, sp, stop, ss+1, stopst, lev-1)); 529*55845Sbostic else 530*55845Sbostic return(dp); 531*55845Sbostic break; 532*55845Sbostic case OCH_: /* find the right one, if any */ 533*55845Sbostic ssub = ss + 1; 534*55845Sbostic esub = ss + OPND(s) - 1; 535*55845Sbostic assert(OP(m->g->strip[esub]) == OOR1); 536*55845Sbostic for (;;) { /* find first matching branch */ 537*55845Sbostic dp = backref(m, sp, stop, ssub, esub, lev); 538*55845Sbostic if (dp != NULL) 539*55845Sbostic return(dp); 540*55845Sbostic /* that one missed, try next one */ 541*55845Sbostic if (OP(m->g->strip[esub]) == O_CH) 542*55845Sbostic return(NULL); /* there is none */ 543*55845Sbostic esub++; 544*55845Sbostic assert(OP(m->g->strip[esub]) == OOR2); 545*55845Sbostic ssub = esub + 1; 546*55845Sbostic esub += OPND(m->g->strip[esub]); 547*55845Sbostic if (OP(m->g->strip[esub]) == OOR2) 548*55845Sbostic esub--; 549*55845Sbostic else 550*55845Sbostic assert(OP(m->g->strip[esub]) == O_CH); 551*55845Sbostic } 552*55845Sbostic break; 553*55845Sbostic case OLPAREN: /* must undo assignment if rest fails */ 554*55845Sbostic i = OPND(s); 555*55845Sbostic offsave = m->pmatch[i].rm_so; 556*55845Sbostic m->pmatch[i].rm_so = sp - m->offp; 557*55845Sbostic dp = backref(m, sp, stop, ss+1, stopst, lev); 558*55845Sbostic if (dp != NULL) 559*55845Sbostic return(dp); 560*55845Sbostic m->pmatch[i].rm_so = offsave; 561*55845Sbostic return(NULL); 562*55845Sbostic break; 563*55845Sbostic case ORPAREN: /* must undo assignment if rest fails */ 564*55845Sbostic i = OPND(s); 565*55845Sbostic offsave = m->pmatch[i].rm_eo; 566*55845Sbostic m->pmatch[i].rm_eo = sp - m->offp; 567*55845Sbostic dp = backref(m, sp, stop, ss+1, stopst, lev); 568*55845Sbostic if (dp != NULL) 569*55845Sbostic return(dp); 570*55845Sbostic m->pmatch[i].rm_eo = offsave; 571*55845Sbostic return(NULL); 572*55845Sbostic break; 573*55845Sbostic default: /* uh oh */ 574*55845Sbostic assert(0); 575*55845Sbostic break; 576*55845Sbostic } 577*55845Sbostic 578*55845Sbostic /* "can't happen" */ 579*55845Sbostic assert(0); 580*55845Sbostic /* NOTREACHED */ 581*55845Sbostic } 582*55845Sbostic 583*55845Sbostic /* 584*55845Sbostic - fast - step through the string at top speed 585*55845Sbostic */ 586*55845Sbostic static uchar * /* where tentative match ended, or NULL */ 587*55845Sbostic fast(m, start, stop, startst, stopst) 588*55845Sbostic register struct match *m; 589*55845Sbostic uchar *start; 590*55845Sbostic uchar *stop; 591*55845Sbostic sopno startst; 592*55845Sbostic sopno stopst; 593*55845Sbostic { 594*55845Sbostic register states st = m->st; 595*55845Sbostic register states fresh = m->fresh; 596*55845Sbostic register states tmp = m->tmp; 597*55845Sbostic register uchar *p = start; 598*55845Sbostic register uchar c; 599*55845Sbostic register uchar lastc; /* previous c */ 600*55845Sbostic register int atbol; 601*55845Sbostic register int ateol; 602*55845Sbostic register uchar *coldp; /* last p after which no match was underway */ 603*55845Sbostic 604*55845Sbostic CLEAR(st); 605*55845Sbostic SET1(st, startst); 606*55845Sbostic st = expand(m->g, startst, stopst, st, 0, 0); 607*55845Sbostic ASSIGN(fresh, st); 608*55845Sbostic SP("start", st, *p); 609*55845Sbostic c = '\0'; 610*55845Sbostic coldp = NULL; 611*55845Sbostic for (;;) { 612*55845Sbostic /* next character */ 613*55845Sbostic lastc = c; 614*55845Sbostic c = (p == m->endp) ? '\0' : *p; 615*55845Sbostic if (EQ(st, fresh)) 616*55845Sbostic coldp = p; 617*55845Sbostic 618*55845Sbostic /* is there an EOL and/or BOL between lastc and c? */ 619*55845Sbostic atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 620*55845Sbostic (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 621*55845Sbostic ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 622*55845Sbostic (c == '\0' && !(m->eflags®_NOTEOL)) ); 623*55845Sbostic if (atbol || ateol) { 624*55845Sbostic st = expand(m->g, startst, stopst, st, atbol, ateol); 625*55845Sbostic SP("bef", st, c); 626*55845Sbostic } 627*55845Sbostic 628*55845Sbostic /* are we done? */ 629*55845Sbostic if (ISSET(st, stopst) || p == stop) 630*55845Sbostic break; /* NOTE BREAK OUT */ 631*55845Sbostic 632*55845Sbostic /* no, we must deal with this character */ 633*55845Sbostic ASSIGN(tmp, st); 634*55845Sbostic ASSIGN(st, fresh); 635*55845Sbostic st = step(m->g, startst, stopst, tmp, c, st); 636*55845Sbostic SP("aft", st, c); 637*55845Sbostic assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 638*55845Sbostic p++; 639*55845Sbostic } 640*55845Sbostic 641*55845Sbostic assert(coldp != NULL); 642*55845Sbostic m->coldp = coldp; 643*55845Sbostic if (ISSET(st, stopst)) 644*55845Sbostic return(p+1); 645*55845Sbostic else 646*55845Sbostic return(NULL); 647*55845Sbostic } 648*55845Sbostic 649*55845Sbostic /* 650*55845Sbostic - slow - step through the string more deliberately 651*55845Sbostic */ 652*55845Sbostic static uchar * /* where it ended */ 653*55845Sbostic slow(m, start, stop, startst, stopst) 654*55845Sbostic register struct match *m; 655*55845Sbostic uchar *start; 656*55845Sbostic uchar *stop; 657*55845Sbostic sopno startst; 658*55845Sbostic sopno stopst; 659*55845Sbostic { 660*55845Sbostic register states st = m->st; 661*55845Sbostic register states empty = m->empty; 662*55845Sbostic register states tmp = m->tmp; 663*55845Sbostic register uchar *p = start; 664*55845Sbostic register uchar c = (start == m->beginp) ? '\0' : *(start-1); 665*55845Sbostic register uchar lastc; /* previous c */ 666*55845Sbostic register int atbol; 667*55845Sbostic register int ateol; 668*55845Sbostic register uchar *matchp; /* last p at which a match ended */ 669*55845Sbostic 670*55845Sbostic DO("slow", start, stop, startst, stopst); 671*55845Sbostic CLEAR(st); 672*55845Sbostic SET1(st, startst); 673*55845Sbostic SP("sstart", st, *p); 674*55845Sbostic matchp = NULL; 675*55845Sbostic for (;;) { 676*55845Sbostic /* next character */ 677*55845Sbostic lastc = c; 678*55845Sbostic c = (p == m->endp) ? '\0' : *p; 679*55845Sbostic 680*55845Sbostic /* is there an EOL and/or BOL between lastc and c? */ 681*55845Sbostic atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 682*55845Sbostic (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 683*55845Sbostic ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 684*55845Sbostic (c == '\0' && !(m->eflags®_NOTEOL)) ); 685*55845Sbostic 686*55845Sbostic /* do we need an expansion before looking at the char? */ 687*55845Sbostic if (p == start || atbol || ateol) { 688*55845Sbostic st = expand(m->g, startst, stopst, st, atbol, ateol); 689*55845Sbostic SP("sbef", st, c); 690*55845Sbostic } 691*55845Sbostic 692*55845Sbostic /* are we done? */ 693*55845Sbostic if (ISSET(st, stopst)) 694*55845Sbostic matchp = p; 695*55845Sbostic if (EQ(st, empty) || p == stop) 696*55845Sbostic break; /* NOTE BREAK OUT */ 697*55845Sbostic 698*55845Sbostic /* no, we must deal with this character */ 699*55845Sbostic ASSIGN(tmp, st); 700*55845Sbostic ASSIGN(st, empty); 701*55845Sbostic st = step(m->g, startst, stopst, tmp, c, st); 702*55845Sbostic SP("saft", st, c); 703*55845Sbostic assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 704*55845Sbostic p++; 705*55845Sbostic } 706*55845Sbostic 707*55845Sbostic return(matchp); 708*55845Sbostic } 709*55845Sbostic 710*55845Sbostic 711*55845Sbostic /* 712*55845Sbostic - expand - return set of states reachable from an initial set 713*55845Sbostic */ 714*55845Sbostic static states 715*55845Sbostic expand(g, start, stop, st, atbol, ateol) 716*55845Sbostic register struct re_guts *g; 717*55845Sbostic int start; /* start state within strip */ 718*55845Sbostic int stop; /* state after stop state within strip */ 719*55845Sbostic register states st; 720*55845Sbostic int atbol; /* at start or just after \n? (for BOL) */ 721*55845Sbostic int ateol; /* just before \n or \0? (for EOL) */ 722*55845Sbostic { 723*55845Sbostic register sop s; 724*55845Sbostic register sopno pc; 725*55845Sbostic register onestate here; /* note, macros know this name */ 726*55845Sbostic register sopno look; 727*55845Sbostic register int i; 728*55845Sbostic 729*55845Sbostic for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 730*55845Sbostic s = g->strip[pc]; 731*55845Sbostic switch (OP(s)) { 732*55845Sbostic case OEND: 733*55845Sbostic assert(pc == stop-1); 734*55845Sbostic break; 735*55845Sbostic case OCHAR: /* can't get past this */ 736*55845Sbostic break; 737*55845Sbostic case OBOL: 738*55845Sbostic if (atbol) 739*55845Sbostic FWD(st, st, 1); 740*55845Sbostic break; 741*55845Sbostic case OEOL: 742*55845Sbostic if (ateol) 743*55845Sbostic FWD(st, st, 1); 744*55845Sbostic break; 745*55845Sbostic case OANY: /* can't get past this either */ 746*55845Sbostic break; 747*55845Sbostic case OANYOF: /* or this */ 748*55845Sbostic break; 749*55845Sbostic case OBACK_: /* not significant here */ 750*55845Sbostic case O_BACK: 751*55845Sbostic FWD(st, st, 1); 752*55845Sbostic break; 753*55845Sbostic case OPLUS_: /* forward, this is just an empty */ 754*55845Sbostic FWD(st, st, 1); 755*55845Sbostic break; 756*55845Sbostic case O_PLUS: /* both forward and back */ 757*55845Sbostic FWD(st, st, 1); 758*55845Sbostic i = ISSETBACK(st, OPND(s)); 759*55845Sbostic BACK(st, st, OPND(s)); 760*55845Sbostic if (!i && ISSETBACK(st, OPND(s))) { 761*55845Sbostic /* oho, must reconsider loop body */ 762*55845Sbostic pc -= OPND(s) + 1; 763*55845Sbostic INIT(here, pc); 764*55845Sbostic } 765*55845Sbostic break; 766*55845Sbostic case OQUEST_: /* two branches, both forward */ 767*55845Sbostic FWD(st, st, 1); 768*55845Sbostic FWD(st, st, OPND(s)); 769*55845Sbostic break; 770*55845Sbostic case O_QUEST: /* just an empty */ 771*55845Sbostic FWD(st, st, 1); 772*55845Sbostic break; 773*55845Sbostic case OLPAREN: /* not significant here */ 774*55845Sbostic case ORPAREN: 775*55845Sbostic FWD(st, st, 1); 776*55845Sbostic break; 777*55845Sbostic case OCH_: /* mark the first two branches */ 778*55845Sbostic FWD(st, st, 1); 779*55845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 780*55845Sbostic FWD(st, st, OPND(s)); 781*55845Sbostic break; 782*55845Sbostic case OOR1: /* done a branch, find the O_CH */ 783*55845Sbostic if (ISSTATEIN(st, here)) { 784*55845Sbostic for (look = 1; 785*55845Sbostic OP(s = g->strip[pc+look]) != O_CH; 786*55845Sbostic look += OPND(s)) 787*55845Sbostic assert(OP(s) == OOR2); 788*55845Sbostic FWD(st, st, look); 789*55845Sbostic } 790*55845Sbostic break; 791*55845Sbostic case OOR2: /* propagate OCH_'s marking onward */ 792*55845Sbostic FWD(st, st, 1); 793*55845Sbostic if (OP(g->strip[pc+OPND(s)]) != O_CH) { 794*55845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 795*55845Sbostic FWD(st, st, OPND(s)); 796*55845Sbostic } 797*55845Sbostic break; 798*55845Sbostic case O_CH: /* just an empty */ 799*55845Sbostic FWD(st, st, 1); 800*55845Sbostic break; 801*55845Sbostic default: /* ooooops... */ 802*55845Sbostic assert(0); 803*55845Sbostic break; 804*55845Sbostic } 805*55845Sbostic } 806*55845Sbostic 807*55845Sbostic return(st); 808*55845Sbostic } 809*55845Sbostic 810*55845Sbostic /* 811*55845Sbostic - step - map set of states reachable before char to set reachable after 812*55845Sbostic */ 813*55845Sbostic static states 814*55845Sbostic step(g, start, stop, bef, ch, aft) 815*55845Sbostic register struct re_guts *g; 816*55845Sbostic int start; /* start state within strip */ 817*55845Sbostic int stop; /* state after stop state within strip */ 818*55845Sbostic register states bef; /* states reachable before */ 819*55845Sbostic uchar ch; 820*55845Sbostic register states aft; /* states already known reachable after */ 821*55845Sbostic { 822*55845Sbostic register cset *cs; 823*55845Sbostic register sop s; 824*55845Sbostic register sopno pc; 825*55845Sbostic register onestate here; /* note, macros know this name */ 826*55845Sbostic register sopno look; 827*55845Sbostic register int i; 828*55845Sbostic 829*55845Sbostic for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 830*55845Sbostic s = g->strip[pc]; 831*55845Sbostic switch (OP(s)) { 832*55845Sbostic case OEND: 833*55845Sbostic assert(pc == stop-1); 834*55845Sbostic break; 835*55845Sbostic case OCHAR: 836*55845Sbostic if (ch == OPND(s)) 837*55845Sbostic FWD(aft, bef, 1); 838*55845Sbostic break; 839*55845Sbostic case OBOL: /* these are looked after by expand() */ 840*55845Sbostic case OEOL: 841*55845Sbostic break; 842*55845Sbostic case OANY: 843*55845Sbostic FWD(aft, bef, 1); 844*55845Sbostic break; 845*55845Sbostic case OANYOF: 846*55845Sbostic cs = &g->sets[OPND(s)]; 847*55845Sbostic if (CHIN(cs, ch)) 848*55845Sbostic FWD(aft, bef, 1); 849*55845Sbostic break; 850*55845Sbostic case OBACK_: /* ignored here */ 851*55845Sbostic case O_BACK: 852*55845Sbostic FWD(aft, aft, 1); 853*55845Sbostic break; 854*55845Sbostic case OPLUS_: /* forward, this is just an empty */ 855*55845Sbostic FWD(aft, aft, 1); 856*55845Sbostic break; 857*55845Sbostic case O_PLUS: /* both forward and back */ 858*55845Sbostic FWD(aft, aft, 1); 859*55845Sbostic i = ISSETBACK(aft, OPND(s)); 860*55845Sbostic BACK(aft, aft, OPND(s)); 861*55845Sbostic if (!i && ISSETBACK(aft, OPND(s))) { 862*55845Sbostic /* oho, must reconsider loop body */ 863*55845Sbostic pc -= OPND(s) + 1; 864*55845Sbostic INIT(here, pc); 865*55845Sbostic } 866*55845Sbostic break; 867*55845Sbostic case OQUEST_: /* two branches, both forward */ 868*55845Sbostic FWD(aft, aft, 1); 869*55845Sbostic FWD(aft, aft, OPND(s)); 870*55845Sbostic break; 871*55845Sbostic case O_QUEST: /* just an empty */ 872*55845Sbostic FWD(aft, aft, 1); 873*55845Sbostic break; 874*55845Sbostic case OLPAREN: /* not significant here */ 875*55845Sbostic case ORPAREN: 876*55845Sbostic FWD(aft, aft, 1); 877*55845Sbostic break; 878*55845Sbostic case OCH_: /* mark the first two branches */ 879*55845Sbostic FWD(aft, aft, 1); 880*55845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 881*55845Sbostic FWD(aft, aft, OPND(s)); 882*55845Sbostic break; 883*55845Sbostic case OOR1: /* done a branch, find the O_CH */ 884*55845Sbostic if (ISSTATEIN(aft, here)) { 885*55845Sbostic for (look = 1; 886*55845Sbostic OP(s = g->strip[pc+look]) != O_CH; 887*55845Sbostic look += OPND(s)) 888*55845Sbostic assert(OP(s) == OOR2); 889*55845Sbostic FWD(aft, aft, look); 890*55845Sbostic } 891*55845Sbostic break; 892*55845Sbostic case OOR2: /* propagate OCH_'s marking */ 893*55845Sbostic FWD(aft, aft, 1); 894*55845Sbostic if (OP(g->strip[pc+OPND(s)]) != O_CH) { 895*55845Sbostic assert(OP(g->strip[pc+OPND(s)]) == OOR2); 896*55845Sbostic FWD(aft, aft, OPND(s)); 897*55845Sbostic } 898*55845Sbostic break; 899*55845Sbostic case O_CH: /* just empty */ 900*55845Sbostic FWD(aft, aft, 1); 901*55845Sbostic break; 902*55845Sbostic default: /* ooooops... */ 903*55845Sbostic assert(0); 904*55845Sbostic break; 905*55845Sbostic } 906*55845Sbostic } 907*55845Sbostic 908*55845Sbostic return(aft); 909*55845Sbostic } 910*55845Sbostic 911*55845Sbostic #ifndef NDEBUG 912*55845Sbostic /* 913*55845Sbostic - print - print a set of states 914*55845Sbostic */ 915*55845Sbostic static void 916*55845Sbostic print(g, caption, st, ch, d) 917*55845Sbostic struct re_guts *g; 918*55845Sbostic char *caption; 919*55845Sbostic states st; 920*55845Sbostic uchar ch; 921*55845Sbostic FILE *d; 922*55845Sbostic { 923*55845Sbostic register int i; 924*55845Sbostic register int first = 1; 925*55845Sbostic 926*55845Sbostic fprintf(d, "%s", caption); 927*55845Sbostic if (ch != '\0') 928*55845Sbostic fprintf(d, " %s", regchar(ch)); 929*55845Sbostic for (i = 0; i < g->nstates; i++) 930*55845Sbostic if (ISSET(st, i)) { 931*55845Sbostic fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 932*55845Sbostic first = 0; 933*55845Sbostic } 934*55845Sbostic fprintf(d, "\n"); 935*55845Sbostic } 936*55845Sbostic #endif 937*55845Sbostic 938*55845Sbostic #undef matcher 939*55845Sbostic #undef fast 940*55845Sbostic #undef slow 941*55845Sbostic #undef dissect 942*55845Sbostic #undef backref 943*55845Sbostic #undef expand 944*55845Sbostic #undef step 945*55845Sbostic #undef print 946*55845Sbostic #undef match 947