1 /* 2 * The matching engine and friends. This file is #included by regexec.c 3 * after suitable #defines of a variety of macros used herein, so that 4 * different state representations can be used without duplicating masses 5 * of code. 6 */ 7 8 #ifdef SNAMES 9 #define matcher smatcher 10 #define fast sfast 11 #define slow sslow 12 #define dissect sdissect 13 #define backref sbackref 14 #define step sstep 15 #define print sprint 16 #define at sat 17 #define match smat 18 #endif 19 #ifdef LNAMES 20 #define matcher lmatcher 21 #define fast lfast 22 #define slow lslow 23 #define dissect ldissect 24 #define backref lbackref 25 #define step lstep 26 #define print lprint 27 #define at lat 28 #define match lmat 29 #endif 30 31 /* another structure passed up and down to avoid zillions of parameters */ 32 struct match { 33 struct re_guts *g; 34 int eflags; 35 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 36 char *offp; /* offsets work from here */ 37 char *beginp; /* start of string -- virtual NUL precedes */ 38 char *endp; /* end of string -- virtual NUL here */ 39 char *coldp; /* can be no match starting before here */ 40 char **lastpos; /* [nplus+1] */ 41 STATEVARS; 42 states st; /* current states */ 43 states fresh; /* states for a fresh start */ 44 states tmp; /* temporary */ 45 states empty; /* empty set of states */ 46 }; 47 48 #include "engine.ih" 49 50 #ifdef REDEBUG 51 #define SP(t, s, c) print(m, t, s, c, stdout) 52 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 53 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 54 #else 55 #define SP(t, s, c) /* nothing */ 56 #define AT(t, p1, p2, s1, s2) /* nothing */ 57 #define NOTE(s) /* nothing */ 58 #endif 59 60 /* 61 - matcher - the actual matching engine 62 == static int matcher(register struct re_guts *g, char *string, \ 63 == size_t nmatch, regmatch_t pmatch[], int eflags); 64 */ 65 static int /* 0 success, REG_NOMATCH failure */ 66 matcher(g, string, nmatch, pmatch, eflags) 67 register struct re_guts *g; 68 char *string; 69 size_t nmatch; 70 regmatch_t pmatch[]; 71 int eflags; 72 { 73 register char *endp; 74 register int i; 75 struct match mv; 76 register struct match *m = &mv; 77 register char *dp; 78 const register sopno gf = g->firststate+1; /* +1 for OEND */ 79 const register sopno gl = g->laststate; 80 char *start; 81 char *stop; 82 83 /* simplify the situation where possible */ 84 if (g->cflags®_NOSUB) 85 nmatch = 0; 86 if (eflags®_STARTEND) { 87 start = string + pmatch[0].rm_so; 88 stop = string + pmatch[0].rm_eo; 89 } else { 90 start = string; 91 stop = start + strlen(start); 92 } 93 if (stop < start) 94 return(REG_INVARG); 95 96 /* prescreening; this does wonders for this rather slow code */ 97 if (g->must != NULL) { 98 for (dp = start; dp < stop; dp++) 99 if (*dp == g->must[0] && stop - dp >= g->mlen && 100 memcmp(dp, g->must, (size_t)g->mlen) == 0) 101 break; 102 if (dp == stop) /* we didn't find g->must */ 103 return(REG_NOMATCH); 104 } 105 106 /* match struct setup */ 107 m->g = g; 108 m->eflags = eflags; 109 m->pmatch = NULL; 110 m->lastpos = NULL; 111 m->offp = string; 112 m->beginp = start; 113 m->endp = stop; 114 STATESETUP(m, 4); 115 SETUP(m->st); 116 SETUP(m->fresh); 117 SETUP(m->tmp); 118 SETUP(m->empty); 119 CLEAR(m->empty); 120 121 /* this loop does only one repetition except for backrefs */ 122 for (;;) { 123 endp = fast(m, start, stop, gf, gl); 124 if (endp == NULL) { /* a miss */ 125 STATETEARDOWN(m); 126 return(REG_NOMATCH); 127 } 128 if (nmatch == 0 && !g->backrefs) 129 break; /* no further info needed */ 130 131 /* where? */ 132 assert(m->coldp != NULL); 133 for (;;) { 134 NOTE("finding start"); 135 endp = slow(m, m->coldp, stop, gf, gl); 136 if (endp != NULL) 137 break; 138 assert(m->coldp < m->endp); 139 m->coldp++; 140 } 141 if (nmatch == 1 && !g->backrefs) 142 break; /* no further info needed */ 143 144 /* oh my, he wants the subexpressions... */ 145 if (m->pmatch == NULL) 146 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 147 sizeof(regmatch_t)); 148 if (m->pmatch == NULL) { 149 STATETEARDOWN(m); 150 return(REG_ESPACE); 151 } 152 for (i = 1; i <= m->g->nsub; i++) 153 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 154 if (!g->backrefs && !(m->eflags®_BACKR)) { 155 NOTE("dissecting"); 156 dp = dissect(m, m->coldp, endp, gf, gl); 157 } else { 158 if (g->nplus > 0 && m->lastpos == NULL) 159 m->lastpos = (char **)malloc((g->nplus+1) * 160 sizeof(char *)); 161 if (g->nplus > 0 && m->lastpos == NULL) { 162 free(m->pmatch); 163 STATETEARDOWN(m); 164 return(REG_ESPACE); 165 } 166 NOTE("backref dissect"); 167 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 168 } 169 if (dp != NULL) 170 break; 171 172 /* uh-oh... we couldn't find a subexpression-level match */ 173 assert(g->backrefs); /* must be back references doing it */ 174 assert(g->nplus == 0 || m->lastpos != NULL); 175 for (;;) { 176 if (dp != NULL || endp <= m->coldp) 177 break; /* defeat */ 178 NOTE("backoff"); 179 endp = slow(m, m->coldp, endp-1, gf, gl); 180 if (endp == NULL) 181 break; /* defeat */ 182 /* try it on a shorter possibility */ 183 #ifndef NDEBUG 184 for (i = 1; i <= m->g->nsub; i++) { 185 assert(m->pmatch[i].rm_so == -1); 186 assert(m->pmatch[i].rm_eo == -1); 187 } 188 #endif 189 NOTE("backoff dissect"); 190 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 191 } 192 assert(dp == NULL || dp == endp); 193 if (dp != NULL) /* found a shorter one */ 194 break; 195 196 /* despite initial appearances, there is no match here */ 197 NOTE("false alarm"); 198 start = m->coldp + 1; /* recycle starting later */ 199 assert(start <= stop); 200 } 201 202 /* fill in the details if requested */ 203 if (nmatch > 0) { 204 pmatch[0].rm_so = m->coldp - m->offp; 205 pmatch[0].rm_eo = endp - m->offp; 206 } 207 if (nmatch > 1) { 208 assert(m->pmatch != NULL); 209 for (i = 1; i < nmatch; i++) 210 if (i <= m->g->nsub) 211 pmatch[i] = m->pmatch[i]; 212 else { 213 pmatch[i].rm_so = -1; 214 pmatch[i].rm_eo = -1; 215 } 216 } 217 218 if (m->pmatch != NULL) 219 free((char *)m->pmatch); 220 if (m->lastpos != NULL) 221 free((char *)m->lastpos); 222 STATETEARDOWN(m); 223 return(0); 224 } 225 226 /* 227 - dissect - figure out what matched what, no back references 228 == static char *dissect(register struct match *m, char *start, \ 229 == char *stop, sopno startst, sopno stopst); 230 */ 231 static char * /* == stop (success) always */ 232 dissect(m, start, stop, startst, stopst) 233 register struct match *m; 234 char *start; 235 char *stop; 236 sopno startst; 237 sopno stopst; 238 { 239 register int i; 240 register sopno ss; /* start sop of current subRE */ 241 register sopno es; /* end sop of current subRE */ 242 register char *sp; /* start of string matched by it */ 243 register char *stp; /* string matched by it cannot pass here */ 244 register char *rest; /* start of rest of string */ 245 register char *tail; /* string unmatched by rest of RE */ 246 register sopno ssub; /* start sop of subsubRE */ 247 register sopno esub; /* end sop of subsubRE */ 248 register char *ssp; /* start of string matched by subsubRE */ 249 register char *sep; /* end of string matched by subsubRE */ 250 register char *oldssp; /* previous ssp */ 251 register char *dp; 252 253 AT("diss", start, stop, startst, stopst); 254 sp = start; 255 for (ss = startst; ss < stopst; ss = es) { 256 /* identify end of subRE */ 257 es = ss; 258 switch (OP(m->g->strip[es])) { 259 case OPLUS_: 260 case OQUEST_: 261 es += OPND(m->g->strip[es]); 262 break; 263 case OCH_: 264 while (OP(m->g->strip[es]) != O_CH) 265 es += OPND(m->g->strip[es]); 266 break; 267 } 268 es++; 269 270 /* figure out what it matched */ 271 switch (OP(m->g->strip[ss])) { 272 case OEND: 273 assert(nope); 274 break; 275 case OCHAR: 276 sp++; 277 break; 278 case OBOL: 279 case OEOL: 280 case OBOW: 281 case OEOW: 282 break; 283 case OANY: 284 case OANYOF: 285 sp++; 286 break; 287 case OBACK_: 288 case O_BACK: 289 assert(nope); 290 break; 291 /* cases where length of match is hard to find */ 292 case OQUEST_: 293 stp = stop; 294 for (;;) { 295 /* how long could this one be? */ 296 rest = slow(m, sp, stp, ss, es); 297 assert(rest != NULL); /* it did match */ 298 /* could the rest match the rest? */ 299 tail = slow(m, rest, stop, es, stopst); 300 if (tail == stop) 301 break; /* yes! */ 302 /* no -- try a shorter match for this one */ 303 stp = rest - 1; 304 assert(stp >= sp); /* it did work */ 305 } 306 ssub = ss + 1; 307 esub = es - 1; 308 /* did innards match? */ 309 if (slow(m, sp, rest, ssub, esub) != NULL) { 310 dp = dissect(m, sp, rest, ssub, esub); 311 assert(dp == rest); 312 } else /* no */ 313 assert(sp == rest); 314 sp = rest; 315 break; 316 case OPLUS_: 317 stp = stop; 318 for (;;) { 319 /* how long could this one be? */ 320 rest = slow(m, sp, stp, ss, es); 321 assert(rest != NULL); /* it did match */ 322 /* could the rest match the rest? */ 323 tail = slow(m, rest, stop, es, stopst); 324 if (tail == stop) 325 break; /* yes! */ 326 /* no -- try a shorter match for this one */ 327 stp = rest - 1; 328 assert(stp >= sp); /* it did work */ 329 } 330 ssub = ss + 1; 331 esub = es - 1; 332 ssp = sp; 333 oldssp = ssp; 334 for (;;) { /* find last match of innards */ 335 sep = slow(m, ssp, rest, ssub, esub); 336 if (sep == NULL || sep == ssp) 337 break; /* failed or matched null */ 338 oldssp = ssp; /* on to next try */ 339 ssp = sep; 340 } 341 if (sep == NULL) { 342 /* last successful match */ 343 sep = ssp; 344 ssp = oldssp; 345 } 346 assert(sep == rest); /* must exhaust substring */ 347 assert(slow(m, ssp, sep, ssub, esub) == rest); 348 dp = dissect(m, ssp, sep, ssub, esub); 349 assert(dp == sep); 350 sp = rest; 351 break; 352 case OCH_: 353 stp = stop; 354 for (;;) { 355 /* how long could this one be? */ 356 rest = slow(m, sp, stp, ss, es); 357 assert(rest != NULL); /* it did match */ 358 /* could the rest match the rest? */ 359 tail = slow(m, rest, stop, es, stopst); 360 if (tail == stop) 361 break; /* yes! */ 362 /* no -- try a shorter match for this one */ 363 stp = rest - 1; 364 assert(stp >= sp); /* it did work */ 365 } 366 ssub = ss + 1; 367 esub = ss + OPND(m->g->strip[ss]) - 1; 368 assert(OP(m->g->strip[esub]) == OOR1); 369 for (;;) { /* find first matching branch */ 370 if (slow(m, sp, rest, ssub, esub) == rest) 371 break; /* it matched all of it */ 372 /* that one missed, try next one */ 373 assert(OP(m->g->strip[esub]) == OOR1); 374 esub++; 375 assert(OP(m->g->strip[esub]) == OOR2); 376 ssub = esub + 1; 377 esub += OPND(m->g->strip[esub]); 378 if (OP(m->g->strip[esub]) == OOR2) 379 esub--; 380 else 381 assert(OP(m->g->strip[esub]) == O_CH); 382 } 383 dp = dissect(m, sp, rest, ssub, esub); 384 assert(dp == rest); 385 sp = rest; 386 break; 387 case O_PLUS: 388 case O_QUEST: 389 case OOR1: 390 case OOR2: 391 case O_CH: 392 assert(nope); 393 break; 394 case OLPAREN: 395 i = OPND(m->g->strip[ss]); 396 assert(0 < i && i <= m->g->nsub); 397 m->pmatch[i].rm_so = sp - m->offp; 398 break; 399 case ORPAREN: 400 i = OPND(m->g->strip[ss]); 401 assert(0 < i && i <= m->g->nsub); 402 m->pmatch[i].rm_eo = sp - m->offp; 403 break; 404 default: /* uh oh */ 405 assert(nope); 406 break; 407 } 408 } 409 410 assert(sp == stop); 411 return(sp); 412 } 413 414 /* 415 - backref - figure out what matched what, figuring in back references 416 == static char *backref(register struct match *m, char *start, \ 417 == char *stop, sopno startst, sopno stopst, sopno lev); 418 */ 419 static char * /* == stop (success) or NULL (failure) */ 420 backref(m, start, stop, startst, stopst, lev) 421 register struct match *m; 422 char *start; 423 char *stop; 424 sopno startst; 425 sopno stopst; 426 sopno lev; /* PLUS nesting level */ 427 { 428 register int i; 429 register sopno ss; /* start sop of current subRE */ 430 register char *sp; /* start of string matched by it */ 431 register sopno ssub; /* start sop of subsubRE */ 432 register sopno esub; /* end sop of subsubRE */ 433 register char *ssp; /* start of string matched by subsubRE */ 434 register char *dp; 435 register size_t len; 436 register int hard; 437 register sop s; 438 register regoff_t offsave; 439 register cset *cs; 440 441 AT("back", start, stop, startst, stopst); 442 sp = start; 443 444 /* get as far as we can with easy stuff */ 445 hard = 0; 446 for (ss = startst; !hard && ss < stopst; ss++) 447 switch (OP(s = m->g->strip[ss])) { 448 case OCHAR: 449 if (sp == stop || *sp++ != (char)OPND(s)) 450 return(NULL); 451 break; 452 case OANY: 453 if (sp == stop) 454 return(NULL); 455 sp++; 456 break; 457 case OANYOF: 458 cs = &m->g->sets[OPND(s)]; 459 if (sp == stop || !CHIN(cs, *sp++)) 460 return(NULL); 461 break; 462 case OBOL: 463 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 464 (sp < m->endp && *(sp-1) == '\n' && 465 (m->g->cflags®_NEWLINE)) ) 466 { /* yes */ } 467 else 468 return(NULL); 469 break; 470 case OEOL: 471 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 472 (sp < m->endp && *sp == '\n' && 473 (m->g->cflags®_NEWLINE)) ) 474 { /* yes */ } 475 else 476 return(NULL); 477 break; 478 case OBOW: 479 if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 480 (sp < m->endp && *(sp-1) == '\n' && 481 (m->g->cflags®_NEWLINE)) || 482 (sp > m->beginp && 483 !ISWORD(*(sp-1))) ) && 484 (sp < m->endp && ISWORD(*sp)) ) 485 { /* yes */ } 486 else 487 return(NULL); 488 break; 489 case OEOW: 490 if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 491 (sp < m->endp && *sp == '\n' && 492 (m->g->cflags®_NEWLINE)) || 493 (sp < m->endp && !ISWORD(*sp)) ) && 494 (sp > m->beginp && ISWORD(*(sp-1))) ) 495 { /* yes */ } 496 else 497 return(NULL); 498 break; 499 case O_QUEST: 500 break; 501 case OOR1: /* matches null but needs to skip */ 502 ss++; 503 s = m->g->strip[ss]; 504 do { 505 assert(OP(s) == OOR2); 506 ss += OPND(s); 507 } while (OP(s = m->g->strip[ss]) != O_CH); 508 /* note that the ss++ gets us past the O_CH */ 509 break; 510 default: /* have to make a choice */ 511 hard = 1; 512 break; 513 } 514 if (!hard) { /* that was it! */ 515 if (sp != stop) 516 return(NULL); 517 return(sp); 518 } 519 ss--; /* adjust for the for's final increment */ 520 521 /* the hard stuff */ 522 AT("hard", sp, stop, ss, stopst); 523 s = m->g->strip[ss]; 524 switch (OP(s)) { 525 case OBACK_: /* the vilest depths */ 526 i = OPND(s); 527 assert(0 < i && i <= m->g->nsub); 528 if (m->pmatch[i].rm_eo == -1) 529 return(NULL); 530 assert(m->pmatch[i].rm_so != -1); 531 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 532 assert(stop - m->beginp >= len); 533 if (sp > stop - len) 534 return(NULL); /* not enough left to match */ 535 ssp = m->offp + m->pmatch[i].rm_so; 536 if (memcmp(sp, ssp, len) != 0) 537 return(NULL); 538 while (m->g->strip[ss] != SOP(O_BACK, i)) 539 ss++; 540 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 541 break; 542 case OQUEST_: /* to null or not */ 543 dp = backref(m, sp, stop, ss+1, stopst, lev); 544 if (dp != NULL) 545 return(dp); /* not */ 546 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 547 break; 548 case OPLUS_: 549 assert(m->lastpos != NULL); 550 assert(lev+1 <= m->g->nplus); 551 m->lastpos[lev+1] = sp; 552 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 553 break; 554 case O_PLUS: 555 if (sp == m->lastpos[lev]) /* last pass matched null */ 556 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 557 /* try another pass */ 558 m->lastpos[lev] = sp; 559 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 560 if (dp == NULL) 561 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 562 else 563 return(dp); 564 break; 565 case OCH_: /* find the right one, if any */ 566 ssub = ss + 1; 567 esub = ss + OPND(s) - 1; 568 assert(OP(m->g->strip[esub]) == OOR1); 569 for (;;) { /* find first matching branch */ 570 dp = backref(m, sp, stop, ssub, esub, lev); 571 if (dp != NULL) 572 return(dp); 573 /* that one missed, try next one */ 574 if (OP(m->g->strip[esub]) == O_CH) 575 return(NULL); /* there is none */ 576 esub++; 577 assert(OP(m->g->strip[esub]) == OOR2); 578 ssub = esub + 1; 579 esub += OPND(m->g->strip[esub]); 580 if (OP(m->g->strip[esub]) == OOR2) 581 esub--; 582 else 583 assert(OP(m->g->strip[esub]) == O_CH); 584 } 585 break; 586 case OLPAREN: /* must undo assignment if rest fails */ 587 i = OPND(s); 588 assert(0 < i && i <= m->g->nsub); 589 offsave = m->pmatch[i].rm_so; 590 m->pmatch[i].rm_so = sp - m->offp; 591 dp = backref(m, sp, stop, ss+1, stopst, lev); 592 if (dp != NULL) 593 return(dp); 594 m->pmatch[i].rm_so = offsave; 595 return(NULL); 596 break; 597 case ORPAREN: /* must undo assignment if rest fails */ 598 i = OPND(s); 599 assert(0 < i && i <= m->g->nsub); 600 offsave = m->pmatch[i].rm_eo; 601 m->pmatch[i].rm_eo = sp - m->offp; 602 dp = backref(m, sp, stop, ss+1, stopst, lev); 603 if (dp != NULL) 604 return(dp); 605 m->pmatch[i].rm_eo = offsave; 606 return(NULL); 607 break; 608 default: /* uh oh */ 609 assert(nope); 610 break; 611 } 612 613 /* "can't happen" */ 614 assert(nope); 615 /* NOTREACHED */ 616 } 617 618 /* 619 - fast - step through the string at top speed 620 == static char *fast(register struct match *m, char *start, \ 621 == char *stop, sopno startst, sopno stopst); 622 */ 623 static char * /* where tentative match ended, or NULL */ 624 fast(m, start, stop, startst, stopst) 625 register struct match *m; 626 char *start; 627 char *stop; 628 sopno startst; 629 sopno stopst; 630 { 631 register states st = m->st; 632 register states fresh = m->fresh; 633 register states tmp = m->tmp; 634 register char *p = start; 635 register int c = (start == m->beginp) ? OUT : *(start-1); 636 register int lastc; /* previous c */ 637 register int flagch; 638 register int i; 639 register char *coldp; /* last p after which no match was underway */ 640 641 CLEAR(st); 642 SET1(st, startst); 643 st = step(m->g, startst, stopst, st, NOTHING, st); 644 ASSIGN(fresh, st); 645 SP("start", st, *p); 646 coldp = NULL; 647 for (;;) { 648 /* next character */ 649 lastc = c; 650 c = (p == m->endp) ? OUT : *p; 651 if (EQ(st, fresh)) 652 coldp = p; 653 654 /* is there an EOL and/or BOL between lastc and c? */ 655 flagch = '\0'; 656 i = 0; 657 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 658 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 659 flagch = BOL; 660 i = m->g->nbol; 661 } 662 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 663 (c == OUT && !(m->eflags®_NOTEOL)) ) { 664 flagch = (flagch == BOL) ? BOLEOL : EOL; 665 i += m->g->neol; 666 } 667 if (i != 0) { 668 for (; i > 0; i--) 669 st = step(m->g, startst, stopst, st, flagch, st); 670 SP("boleol", st, c); 671 } 672 673 /* how about a word boundary? */ 674 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 675 (c != OUT && ISWORD(c)) ) { 676 flagch = BOW; 677 } 678 if ( (lastc != OUT && ISWORD(lastc)) && 679 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 680 flagch = EOW; 681 } 682 if (flagch == BOW || flagch == EOW) { 683 st = step(m->g, startst, stopst, st, flagch, st); 684 SP("boweow", st, c); 685 } 686 687 /* are we done? */ 688 if (ISSET(st, stopst) || p == stop) 689 break; /* NOTE BREAK OUT */ 690 691 /* no, we must deal with this character */ 692 ASSIGN(tmp, st); 693 ASSIGN(st, fresh); 694 assert(c != OUT); 695 st = step(m->g, startst, stopst, tmp, c, st); 696 SP("aft", st, c); 697 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 698 p++; 699 } 700 701 assert(coldp != NULL); 702 m->coldp = coldp; 703 if (ISSET(st, stopst)) 704 return(p+1); 705 else 706 return(NULL); 707 } 708 709 /* 710 - slow - step through the string more deliberately 711 == static char *slow(register struct match *m, char *start, \ 712 == char *stop, sopno startst, sopno stopst); 713 */ 714 static char * /* where it ended */ 715 slow(m, start, stop, startst, stopst) 716 register struct match *m; 717 char *start; 718 char *stop; 719 sopno startst; 720 sopno stopst; 721 { 722 register states st = m->st; 723 register states empty = m->empty; 724 register states tmp = m->tmp; 725 register char *p = start; 726 register int c = (start == m->beginp) ? OUT : *(start-1); 727 register int lastc; /* previous c */ 728 register int flagch; 729 register int i; 730 register char *matchp; /* last p at which a match ended */ 731 732 AT("slow", start, stop, startst, stopst); 733 CLEAR(st); 734 SET1(st, startst); 735 SP("sstart", st, *p); 736 st = step(m->g, startst, stopst, st, NOTHING, st); 737 matchp = NULL; 738 for (;;) { 739 /* next character */ 740 lastc = c; 741 c = (p == m->endp) ? OUT : *p; 742 743 /* is there an EOL and/or BOL between lastc and c? */ 744 flagch = '\0'; 745 i = 0; 746 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 747 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 748 flagch = BOL; 749 i = m->g->nbol; 750 } 751 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 752 (c == OUT && !(m->eflags®_NOTEOL)) ) { 753 flagch = (flagch == BOL) ? BOLEOL : EOL; 754 i += m->g->neol; 755 } 756 if (i != 0) { 757 for (; i > 0; i--) 758 st = step(m->g, startst, stopst, st, flagch, st); 759 SP("sboleol", st, c); 760 } 761 762 /* how about a word boundary? */ 763 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 764 (c != OUT && ISWORD(c)) ) { 765 flagch = BOW; 766 } 767 if ( (lastc != OUT && ISWORD(lastc)) && 768 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 769 flagch = EOW; 770 } 771 if (flagch == BOW || flagch == EOW) { 772 st = step(m->g, startst, stopst, st, flagch, st); 773 SP("sboweow", st, c); 774 } 775 776 /* are we done? */ 777 if (ISSET(st, stopst)) 778 matchp = p; 779 if (EQ(st, empty) || p == stop) 780 break; /* NOTE BREAK OUT */ 781 782 /* no, we must deal with this character */ 783 ASSIGN(tmp, st); 784 ASSIGN(st, empty); 785 assert(c != OUT); 786 st = step(m->g, startst, stopst, tmp, c, st); 787 SP("saft", st, c); 788 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 789 p++; 790 } 791 792 return(matchp); 793 } 794 795 796 /* 797 - step - map set of states reachable before char to set reachable after 798 == static states step(register struct re_guts *g, sopno start, sopno stop, \ 799 == register states bef, int ch, register states aft); 800 == #define BOL (OUT+1) 801 == #define EOL (BOL+1) 802 == #define BOLEOL (BOL+2) 803 == #define NOTHING (BOL+3) 804 == #define BOW (BOL+4) 805 == #define EOW (BOL+5) 806 == #define CODEMAX (BOL+5) // highest code used 807 == #define NONCHAR(c) ((c) > CHAR_MAX) 808 == #define NNONCHAR (CODEMAX-CHAR_MAX) 809 */ 810 static states 811 step(g, start, stop, bef, ch, aft) 812 register struct re_guts *g; 813 sopno start; /* start state within strip */ 814 sopno stop; /* state after stop state within strip */ 815 register states bef; /* states reachable before */ 816 int ch; /* character or NONCHAR code */ 817 register states aft; /* states already known reachable after */ 818 { 819 register cset *cs; 820 register sop s; 821 register sopno pc; 822 register onestate here; /* note, macros know this name */ 823 register sopno look; 824 register int i; 825 826 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 827 s = g->strip[pc]; 828 switch (OP(s)) { 829 case OEND: 830 assert(pc == stop-1); 831 break; 832 case OCHAR: 833 /* only characters can match */ 834 assert(!NONCHAR(ch) || ch != (char)OPND(s)); 835 if (ch == (char)OPND(s)) 836 FWD(aft, bef, 1); 837 break; 838 case OBOL: 839 if (ch == BOL || ch == BOLEOL) 840 FWD(aft, bef, 1); 841 break; 842 case OEOL: 843 if (ch == EOL || ch == BOLEOL) 844 FWD(aft, bef, 1); 845 break; 846 case OBOW: 847 if (ch == BOW) 848 FWD(aft, bef, 1); 849 break; 850 case OEOW: 851 if (ch == EOW) 852 FWD(aft, bef, 1); 853 break; 854 case OANY: 855 if (!NONCHAR(ch)) 856 FWD(aft, bef, 1); 857 break; 858 case OANYOF: 859 cs = &g->sets[OPND(s)]; 860 if (!NONCHAR(ch) && CHIN(cs, ch)) 861 FWD(aft, bef, 1); 862 break; 863 case OBACK_: /* ignored here */ 864 case O_BACK: 865 FWD(aft, aft, 1); 866 break; 867 case OPLUS_: /* forward, this is just an empty */ 868 FWD(aft, aft, 1); 869 break; 870 case O_PLUS: /* both forward and back */ 871 FWD(aft, aft, 1); 872 i = ISSETBACK(aft, OPND(s)); 873 BACK(aft, aft, OPND(s)); 874 if (!i && ISSETBACK(aft, OPND(s))) { 875 /* oho, must reconsider loop body */ 876 pc -= OPND(s) + 1; 877 INIT(here, pc); 878 } 879 break; 880 case OQUEST_: /* two branches, both forward */ 881 FWD(aft, aft, 1); 882 FWD(aft, aft, OPND(s)); 883 break; 884 case O_QUEST: /* just an empty */ 885 FWD(aft, aft, 1); 886 break; 887 case OLPAREN: /* not significant here */ 888 case ORPAREN: 889 FWD(aft, aft, 1); 890 break; 891 case OCH_: /* mark the first two branches */ 892 FWD(aft, aft, 1); 893 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 894 FWD(aft, aft, OPND(s)); 895 break; 896 case OOR1: /* done a branch, find the O_CH */ 897 if (ISSTATEIN(aft, here)) { 898 for (look = 1; 899 OP(s = g->strip[pc+look]) != O_CH; 900 look += OPND(s)) 901 assert(OP(s) == OOR2); 902 FWD(aft, aft, look); 903 } 904 break; 905 case OOR2: /* propagate OCH_'s marking */ 906 FWD(aft, aft, 1); 907 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 908 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 909 FWD(aft, aft, OPND(s)); 910 } 911 break; 912 case O_CH: /* just empty */ 913 FWD(aft, aft, 1); 914 break; 915 default: /* ooooops... */ 916 assert(nope); 917 break; 918 } 919 } 920 921 return(aft); 922 } 923 924 #ifdef REDEBUG 925 /* 926 - print - print a set of states 927 == #ifdef REDEBUG 928 == static void print(struct match *m, char *caption, states st, \ 929 == int ch, FILE *d); 930 == #endif 931 */ 932 static void 933 print(m, caption, st, ch, d) 934 struct match *m; 935 char *caption; 936 states st; 937 int ch; 938 FILE *d; 939 { 940 register struct re_guts *g = m->g; 941 register int i; 942 register int first = 1; 943 944 if (!(m->eflags®_TRACE)) 945 return; 946 947 fprintf(d, "%s", caption); 948 if (ch != '\0') 949 fprintf(d, " %s", pchar(ch)); 950 for (i = 0; i < g->nstates; i++) 951 if (ISSET(st, i)) { 952 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 953 first = 0; 954 } 955 fprintf(d, "\n"); 956 } 957 958 /* 959 - at - print current situation 960 == #ifdef REDEBUG 961 == static void at(struct match *m, char *title, char *start, char *stop, \ 962 == sopno startst, sopno stopst); 963 == #endif 964 */ 965 static void 966 at(m, title, start, stop, startst, stopst) 967 struct match *m; 968 char *title; 969 char *start; 970 char *stop; 971 sopno startst; 972 sopno stopst; 973 { 974 if (!(m->eflags®_TRACE)) 975 return; 976 977 printf("%s %s-", title, pchar(*start)); 978 printf("%s ", pchar(*stop)); 979 printf("%ld-%ld\n", (long)startst, (long)stopst); 980 } 981 982 #ifndef PCHARDONE 983 #define PCHARDONE /* never again */ 984 /* 985 - pchar - make a character printable 986 == #ifdef REDEBUG 987 == static char *pchar(int ch); 988 == #endif 989 * 990 * Is this identical to regchar() over in debug.c? Well, yes. But a 991 * duplicate here avoids having a debugging-capable regexec.o tied to 992 * a matching debug.o, and this is convenient. It all disappears in 993 * the non-debug compilation anyway, so it doesn't matter much. 994 */ 995 static char * /* -> representation */ 996 pchar(ch) 997 int ch; 998 { 999 static char pbuf[10]; 1000 1001 if (isprint(ch) || ch == ' ') 1002 sprintf(pbuf, "%c", ch); 1003 else 1004 sprintf(pbuf, "\\%o", ch); 1005 return(pbuf); 1006 } 1007 #endif 1008 #endif 1009 1010 #undef matcher 1011 #undef fast 1012 #undef slow 1013 #undef dissect 1014 #undef backref 1015 #undef step 1016 #undef print 1017 #undef at 1018 #undef match 1019