1 /*- 2 * Copyright (c) 1992 Henry Spencer. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer of the University of Toronto. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)engine.c 5.1 (Berkeley) 08/06/92 12 */ 13 14 /* 15 * The matching engine and friends. This file is #included by regexec.c 16 * after suitable #defines of a variety of macros used herein, so that 17 * different state representations can be used without duplicating masses 18 * of code. 19 */ 20 21 #ifdef SNAMES 22 #define matcher smatcher 23 #define fast sfast 24 #define slow sslow 25 #define dissect sdissect 26 #define backref sbackref 27 #define expand sexpand 28 #define step sstep 29 #define print sprint 30 #define match smat 31 #endif 32 #ifdef LNAMES 33 #define matcher lmatcher 34 #define fast lfast 35 #define slow lslow 36 #define dissect ldissect 37 #define backref lbackref 38 #define expand lexpand 39 #define step lstep 40 #define print lprint 41 #define match lmat 42 #endif 43 44 /* another structure passed up and down to avoid zillions of parameters */ 45 struct match { 46 struct re_guts *g; 47 int eflags; 48 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 49 uchar *offp; /* offsets work from here */ 50 uchar *beginp; /* start of string -- virtual NUL precedes */ 51 uchar *endp; /* end of string -- virtual NUL here */ 52 uchar *coldp; /* can be no match starting before here */ 53 uchar **lastpos; /* [nplus+1] */ 54 STATEVARS; 55 states st; /* current states */ 56 states fresh; /* states for a fresh start */ 57 states tmp; /* temporary */ 58 states empty; /* empty set of states */ 59 }; 60 61 #ifndef NDEBUG 62 STATIC void print(); 63 extern char *regchar(); 64 #define SP(t, s, c) { if (m->eflags®_TRACE) print(m->g, t, s, c, stdout); } 65 #define DO(t, p1, p2, s1, s2) { if (m->eflags®_TRACE) { \ 66 printf("%s %s-", t, regchar(*(p1))); \ 67 printf("%s ", regchar(*(p2))); \ 68 printf("%ld-%ld\n", s1, s2); } } 69 #else 70 #define SP(t, s, c) /* nothing */ 71 #define DO(t, p1, p2, s1, s2) /* nothing */ 72 #endif 73 74 STATIC uchar *fast(); 75 STATIC uchar *slow(); 76 STATIC uchar *dissect(); 77 STATIC uchar *backref(); 78 STATIC states expand(); 79 STATIC states step(); 80 81 /* 82 - matcher - the actual matching engine 83 */ 84 static int /* 0 success, REG_NOMATCH failure */ 85 matcher(g, string, nmatch, pmatch, eflags) 86 register struct re_guts *g; 87 uchar *string; 88 size_t nmatch; 89 regmatch_t pmatch[]; 90 int eflags; 91 { 92 register uchar *endp; 93 register int i; 94 struct match mv; 95 register struct match *m = &mv; 96 register uchar *dp; 97 const register sopno gf = g->firststate+1; /* +1 for OEND */ 98 const register sopno gl = g->laststate; 99 uchar *start; 100 uchar *stop; 101 102 if (g->cflags®_NOSUB) 103 nmatch = 0; /* simplify tests */ 104 if (eflags®_STARTEND) { 105 start = string + pmatch[0].rm_so; 106 stop = string + pmatch[0].rm_eo; 107 } else { 108 start = string; 109 stop = start + strlen((char *)start); 110 } 111 112 /* match struct setup */ 113 m->g = g; 114 m->eflags = eflags; 115 m->pmatch = NULL; 116 m->lastpos = NULL; 117 m->offp = string; 118 m->beginp = start; 119 m->endp = stop; 120 STATESETUP(m, 4); 121 SETUP(m->st); 122 SETUP(m->fresh); 123 SETUP(m->tmp); 124 SETUP(m->empty); 125 CLEAR(m->empty); 126 127 /* this loop does only one repetition except for backrefs */ 128 for (;;) { 129 endp = fast(m, start, stop, gf, gl); 130 if (endp == NULL) { /* a miss */ 131 STATETEARDOWN(m); 132 return(REG_NOMATCH); 133 } 134 if (nmatch == 0 && !g->backrefs) 135 break; /* no further info needed */ 136 137 /* where? */ 138 assert(m->coldp != NULL); 139 for (;;) { 140 endp = slow(m, m->coldp, stop, gf, gl); 141 if (endp != NULL) 142 break; 143 assert(*m->coldp != '\0'); 144 m->coldp++; 145 } 146 if (nmatch == 1 && !g->backrefs) 147 break; /* no further info needed */ 148 149 /* oh my, he wants the subexpressions... */ 150 if (m->pmatch == NULL) 151 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 152 sizeof(regmatch_t)); 153 if (m->pmatch == NULL) { 154 STATETEARDOWN(m); 155 return(REG_ESPACE); 156 } 157 for (i = 1; i <= m->g->nsub; i++) { 158 m->pmatch[i].rm_so = -1; 159 m->pmatch[i].rm_eo = -1; 160 } 161 if (!g->backrefs) 162 dp = dissect(m, m->coldp, endp, gf, gl); 163 else { 164 if (g->nplus > 0 && m->lastpos == NULL) 165 m->lastpos = (uchar **)malloc((g->nplus+1) * 166 sizeof(uchar *)); 167 if (g->nplus > 0 && m->lastpos == NULL) { 168 free(m->pmatch); 169 STATETEARDOWN(m); 170 return(REG_ESPACE); 171 } 172 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 173 } 174 if (dp != NULL) 175 break; 176 177 /* uh-oh... we couldn't find a subexpression-level match */ 178 assert(g->backrefs); /* must be back references doing it */ 179 assert(g->nplus == 0 || m->lastpos != NULL); 180 while (dp == NULL && endp > m->coldp && 181 (endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) { 182 /* try it on a shorter possibility */ 183 for (i = 1; i <= m->g->nsub; i++) { 184 m->pmatch[i].rm_so = -1; 185 m->pmatch[i].rm_eo = -1; 186 } 187 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 188 } 189 assert(dp == NULL || dp == endp); 190 if (dp != NULL) /* found a shorter one */ 191 break; 192 193 /* despite initial appearances, there is no match here */ 194 start = m->coldp + 1; /* recycle starting later */ 195 assert(start <= stop); 196 } 197 198 /* fill in the details if requested */ 199 if (nmatch > 0) { 200 pmatch[0].rm_so = m->coldp - m->offp; 201 pmatch[0].rm_eo = endp - m->offp; 202 } 203 if (nmatch > 1) { 204 assert(m->pmatch != NULL); 205 for (i = 1; i < nmatch; i++) 206 if (i <= m->g->nsub) 207 pmatch[i] = m->pmatch[i]; 208 else { 209 pmatch[i].rm_so = -1; 210 pmatch[i].rm_eo = -1; 211 } 212 } 213 214 if (m->pmatch != NULL) 215 free((char *)m->pmatch); 216 if (m->lastpos != NULL) 217 free((char *)m->lastpos); 218 STATETEARDOWN(m); 219 return(0); 220 } 221 222 /* 223 - dissect - figure out what matched what, no back references 224 */ 225 static uchar * /* == stop (success) always */ 226 dissect(m, start, stop, startst, stopst) 227 register struct match *m; 228 uchar *start; 229 uchar *stop; 230 sopno startst; 231 sopno stopst; 232 { 233 register int i; 234 register sopno ss; /* start sop of current subRE */ 235 register sopno es; /* end sop of current subRE */ 236 register uchar *sp; /* start of string matched by it */ 237 register uchar *stp; /* string matched by it cannot pass here */ 238 register uchar *rest; /* start of rest of string */ 239 register uchar *tail; /* string unmatched by rest of RE */ 240 register sopno ssub; /* start sop of subsubRE */ 241 register sopno esub; /* end sop of subsubRE */ 242 register uchar *ssp; /* start of string matched by subsubRE */ 243 register uchar *sep; /* end of string matched by subsubRE */ 244 register uchar *oldssp; /* previous ssp */ 245 register uchar *dp; 246 register size_t len; 247 248 DO("diss", start, stop, startst, stopst); 249 sp = start; 250 for (ss = startst; ss < stopst; ss = es) { 251 /* identify end of subRE */ 252 es = ss; 253 switch (OP(m->g->strip[es])) { 254 case OPLUS_: 255 case OQUEST_: 256 es += OPND(m->g->strip[es]); 257 break; 258 case OCH_: 259 while (OP(m->g->strip[es]) != O_CH) 260 es += OPND(m->g->strip[es]); 261 break; 262 } 263 es++; 264 265 /* figure out what it matched */ 266 switch (OP(m->g->strip[ss])) { 267 case OEND: 268 assert(0); 269 break; 270 case OCHAR: 271 sp++; 272 break; 273 case OBOL: 274 case OEOL: 275 break; 276 case OANY: 277 case OANYOF: 278 sp++; 279 break; 280 case OBACK_: 281 case O_BACK: 282 assert(0); 283 break; 284 /* cases where length of match is hard to find */ 285 case OQUEST_: 286 stp = stop; 287 for (;;) { 288 /* how long could this one be? */ 289 rest = slow(m, sp, stp, ss, es); 290 assert(rest != NULL); /* it did match */ 291 /* could the rest match the rest? */ 292 tail = slow(m, rest, stop, es, stopst); 293 if (tail == stop) 294 break; /* yes! */ 295 /* no -- try a shorter match for this one */ 296 stp = rest - 1; 297 assert(stp >= sp); /* it did work */ 298 } 299 ssub = ss + 1; 300 esub = es - 1; 301 /* did innards match? */ 302 if (slow(m, sp, rest, ssub, esub) != NULL) { 303 dp = dissect(m, sp, rest, ssub, esub); 304 assert(dp == rest); 305 } else /* no */ 306 assert(sp == rest); 307 sp = rest; 308 break; 309 case OPLUS_: 310 stp = stop; 311 for (;;) { 312 /* how long could this one be? */ 313 rest = slow(m, sp, stp, ss, es); 314 assert(rest != NULL); /* it did match */ 315 /* could the rest match the rest? */ 316 tail = slow(m, rest, stop, es, stopst); 317 if (tail == stop) 318 break; /* yes! */ 319 /* no -- try a shorter match for this one */ 320 stp = rest - 1; 321 assert(stp >= sp); /* it did work */ 322 } 323 ssub = ss + 1; 324 esub = es - 1; 325 ssp = sp; 326 oldssp = ssp; 327 for (;;) { /* find last match of innards */ 328 sep = slow(m, ssp, rest, ssub, esub); 329 if (sep == NULL || sep == ssp) 330 break; /* failed or matched null */ 331 oldssp = ssp; /* on to next try */ 332 ssp = sep; 333 } 334 if (sep == NULL) { 335 /* last successful match */ 336 sep = ssp; 337 ssp = oldssp; 338 } 339 assert(sep == rest); /* must exhaust substring */ 340 assert(slow(m, ssp, sep, ssub, esub) == rest); 341 dp = dissect(m, ssp, sep, ssub, esub); 342 assert(dp == sep); 343 sp = rest; 344 break; 345 case OCH_: 346 stp = stop; 347 for (;;) { 348 /* how long could this one be? */ 349 rest = slow(m, sp, stp, ss, es); 350 assert(rest != NULL); /* it did match */ 351 /* could the rest match the rest? */ 352 tail = slow(m, rest, stop, es, stopst); 353 if (tail == stop) 354 break; /* yes! */ 355 /* no -- try a shorter match for this one */ 356 stp = rest - 1; 357 assert(stp >= sp); /* it did work */ 358 } 359 ssub = ss + 1; 360 esub = ss + OPND(m->g->strip[ss]) - 1; 361 assert(OP(m->g->strip[esub]) == OOR1); 362 for (;;) { /* find first matching branch */ 363 if (slow(m, sp, rest, ssub, esub) == rest) 364 break; /* it matched all of it */ 365 /* that one missed, try next one */ 366 assert(OP(m->g->strip[esub]) == OOR1); 367 esub++; 368 assert(OP(m->g->strip[esub]) == OOR2); 369 ssub = esub + 1; 370 esub += OPND(m->g->strip[esub]); 371 if (OP(m->g->strip[esub]) == OOR2) 372 esub--; 373 else 374 assert(OP(m->g->strip[esub]) == O_CH); 375 } 376 dp = dissect(m, sp, rest, ssub, esub); 377 assert(dp == rest); 378 sp = rest; 379 break; 380 case O_PLUS: 381 case O_QUEST: 382 case OOR1: 383 case OOR2: 384 case O_CH: 385 assert(0); 386 break; 387 case OLPAREN: 388 i = OPND(m->g->strip[ss]); 389 m->pmatch[i].rm_so = sp - m->offp; 390 break; 391 case ORPAREN: 392 i = OPND(m->g->strip[ss]); 393 m->pmatch[i].rm_eo = sp - m->offp; 394 break; 395 default: /* uh oh */ 396 assert(0); 397 break; 398 } 399 } 400 401 assert(sp == stop); 402 return(sp); 403 } 404 405 /* 406 - backref - figure out what matched what, figuring in back references 407 */ 408 static uchar * /* == stop (success) or NULL (failure) */ 409 backref(m, start, stop, startst, stopst, lev) 410 register struct match *m; 411 uchar *start; 412 uchar *stop; 413 sopno startst; 414 sopno stopst; 415 sopno lev; /* PLUS nesting level */ 416 { 417 register int i; 418 register sopno ss; /* start sop of current subRE */ 419 register uchar *sp; /* start of string matched by it */ 420 register sopno ssub; /* start sop of subsubRE */ 421 register sopno esub; /* end sop of subsubRE */ 422 register uchar *ssp; /* start of string matched by subsubRE */ 423 register uchar *dp; 424 register size_t len; 425 register int hard; 426 register sop s; 427 register regoff_t offsave; 428 register cset *cs; 429 430 DO("back", start, stop, startst, stopst); 431 sp = start; 432 433 /* get as far as we can with easy stuff */ 434 hard = 0; 435 for (ss = startst; !hard && ss < stopst; ss++) 436 switch (OP(s = m->g->strip[ss])) { 437 case OCHAR: 438 if (sp == stop || *sp++ != OPND(s)) 439 return(NULL); 440 break; 441 case OANY: 442 if (sp == stop) 443 return(NULL); 444 sp++; 445 break; 446 case OANYOF: 447 cs = &m->g->sets[OPND(s)]; 448 if (sp == stop || !CHIN(cs, *sp++)) 449 return(NULL); 450 break; 451 case OBOL: 452 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 453 (sp < m->endp && *(sp-1) == '\n' && 454 (m->g->cflags®_NEWLINE)) ) 455 { /* yes */ } 456 else 457 return(NULL); 458 break; 459 case OEOL: 460 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 461 (sp < m->endp && *sp == '\n' && 462 (m->g->cflags®_NEWLINE)) ) 463 { /* yes */ } 464 else 465 return(NULL); 466 break; 467 case O_QUEST: 468 break; 469 case OOR1: /* matches null but needs to skip */ 470 ss++; 471 s = m->g->strip[ss]; 472 do { 473 assert(OP(s) == OOR2); 474 ss += OPND(s); 475 } while (OP(s = m->g->strip[ss]) != O_CH); 476 /* note that the ss++ gets us past the O_CH */ 477 break; 478 default: /* have to make a choice */ 479 hard = 1; 480 break; 481 } 482 if (!hard) { /* that was it! */ 483 if (sp != stop) 484 return(NULL); 485 return(sp); 486 } 487 ss--; /* adjust for the for's final increment */ 488 489 /* the hard stuff */ 490 DO("hard", sp, stop, ss, stopst); 491 s = m->g->strip[ss]; 492 switch (OP(s)) { 493 case OBACK_: /* the vilest depths */ 494 i = OPND(s); 495 if (m->pmatch[i].rm_eo == -1) 496 return(NULL); 497 assert(m->pmatch[i].rm_so != -1); 498 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 499 assert(stop - m->beginp >= len); 500 if (sp > stop - len) 501 return(NULL); /* not enough left to match */ 502 ssp = m->offp + m->pmatch[i].rm_so; 503 if (strncmp((char *)sp, (char *)ssp, len) != 0) 504 return(NULL); 505 while (m->g->strip[ss] != SOP(O_BACK, i)) 506 ss++; 507 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 508 break; 509 case OQUEST_: /* to null or not */ 510 dp = backref(m, sp, stop, ss+1, stopst, lev); 511 if (dp != NULL) 512 return(dp); /* not */ 513 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 514 break; 515 case OPLUS_: 516 assert(m->lastpos != NULL); 517 assert(lev+1 <= m->g->nplus); 518 m->lastpos[lev+1] = sp; 519 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 520 break; 521 case O_PLUS: 522 if (sp == m->lastpos[lev]) /* last pass matched null */ 523 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 524 /* try another pass */ 525 m->lastpos[lev] = sp; 526 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 527 if (dp == NULL) 528 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 529 else 530 return(dp); 531 break; 532 case OCH_: /* find the right one, if any */ 533 ssub = ss + 1; 534 esub = ss + OPND(s) - 1; 535 assert(OP(m->g->strip[esub]) == OOR1); 536 for (;;) { /* find first matching branch */ 537 dp = backref(m, sp, stop, ssub, esub, lev); 538 if (dp != NULL) 539 return(dp); 540 /* that one missed, try next one */ 541 if (OP(m->g->strip[esub]) == O_CH) 542 return(NULL); /* there is none */ 543 esub++; 544 assert(OP(m->g->strip[esub]) == OOR2); 545 ssub = esub + 1; 546 esub += OPND(m->g->strip[esub]); 547 if (OP(m->g->strip[esub]) == OOR2) 548 esub--; 549 else 550 assert(OP(m->g->strip[esub]) == O_CH); 551 } 552 break; 553 case OLPAREN: /* must undo assignment if rest fails */ 554 i = OPND(s); 555 offsave = m->pmatch[i].rm_so; 556 m->pmatch[i].rm_so = sp - m->offp; 557 dp = backref(m, sp, stop, ss+1, stopst, lev); 558 if (dp != NULL) 559 return(dp); 560 m->pmatch[i].rm_so = offsave; 561 return(NULL); 562 break; 563 case ORPAREN: /* must undo assignment if rest fails */ 564 i = OPND(s); 565 offsave = m->pmatch[i].rm_eo; 566 m->pmatch[i].rm_eo = sp - m->offp; 567 dp = backref(m, sp, stop, ss+1, stopst, lev); 568 if (dp != NULL) 569 return(dp); 570 m->pmatch[i].rm_eo = offsave; 571 return(NULL); 572 break; 573 default: /* uh oh */ 574 assert(0); 575 break; 576 } 577 578 /* "can't happen" */ 579 assert(0); 580 /* NOTREACHED */ 581 } 582 583 /* 584 - fast - step through the string at top speed 585 */ 586 static uchar * /* where tentative match ended, or NULL */ 587 fast(m, start, stop, startst, stopst) 588 register struct match *m; 589 uchar *start; 590 uchar *stop; 591 sopno startst; 592 sopno stopst; 593 { 594 register states st = m->st; 595 register states fresh = m->fresh; 596 register states tmp = m->tmp; 597 register uchar *p = start; 598 register uchar c; 599 register uchar lastc; /* previous c */ 600 register int atbol; 601 register int ateol; 602 register uchar *coldp; /* last p after which no match was underway */ 603 604 CLEAR(st); 605 SET1(st, startst); 606 st = expand(m->g, startst, stopst, st, 0, 0); 607 ASSIGN(fresh, st); 608 SP("start", st, *p); 609 c = '\0'; 610 coldp = NULL; 611 for (;;) { 612 /* next character */ 613 lastc = c; 614 c = (p == m->endp) ? '\0' : *p; 615 if (EQ(st, fresh)) 616 coldp = p; 617 618 /* is there an EOL and/or BOL between lastc and c? */ 619 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 620 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 621 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 622 (c == '\0' && !(m->eflags®_NOTEOL)) ); 623 if (atbol || ateol) { 624 st = expand(m->g, startst, stopst, st, atbol, ateol); 625 SP("bef", st, c); 626 } 627 628 /* are we done? */ 629 if (ISSET(st, stopst) || p == stop) 630 break; /* NOTE BREAK OUT */ 631 632 /* no, we must deal with this character */ 633 ASSIGN(tmp, st); 634 ASSIGN(st, fresh); 635 st = step(m->g, startst, stopst, tmp, c, st); 636 SP("aft", st, c); 637 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 638 p++; 639 } 640 641 assert(coldp != NULL); 642 m->coldp = coldp; 643 if (ISSET(st, stopst)) 644 return(p+1); 645 else 646 return(NULL); 647 } 648 649 /* 650 - slow - step through the string more deliberately 651 */ 652 static uchar * /* where it ended */ 653 slow(m, start, stop, startst, stopst) 654 register struct match *m; 655 uchar *start; 656 uchar *stop; 657 sopno startst; 658 sopno stopst; 659 { 660 register states st = m->st; 661 register states empty = m->empty; 662 register states tmp = m->tmp; 663 register uchar *p = start; 664 register uchar c = (start == m->beginp) ? '\0' : *(start-1); 665 register uchar lastc; /* previous c */ 666 register int atbol; 667 register int ateol; 668 register uchar *matchp; /* last p at which a match ended */ 669 670 DO("slow", start, stop, startst, stopst); 671 CLEAR(st); 672 SET1(st, startst); 673 SP("sstart", st, *p); 674 matchp = NULL; 675 for (;;) { 676 /* next character */ 677 lastc = c; 678 c = (p == m->endp) ? '\0' : *p; 679 680 /* is there an EOL and/or BOL between lastc and c? */ 681 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 682 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 683 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 684 (c == '\0' && !(m->eflags®_NOTEOL)) ); 685 686 /* do we need an expansion before looking at the char? */ 687 if (p == start || atbol || ateol) { 688 st = expand(m->g, startst, stopst, st, atbol, ateol); 689 SP("sbef", st, c); 690 } 691 692 /* are we done? */ 693 if (ISSET(st, stopst)) 694 matchp = p; 695 if (EQ(st, empty) || p == stop) 696 break; /* NOTE BREAK OUT */ 697 698 /* no, we must deal with this character */ 699 ASSIGN(tmp, st); 700 ASSIGN(st, empty); 701 st = step(m->g, startst, stopst, tmp, c, st); 702 SP("saft", st, c); 703 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 704 p++; 705 } 706 707 return(matchp); 708 } 709 710 711 /* 712 - expand - return set of states reachable from an initial set 713 */ 714 static states 715 expand(g, start, stop, st, atbol, ateol) 716 register struct re_guts *g; 717 int start; /* start state within strip */ 718 int stop; /* state after stop state within strip */ 719 register states st; 720 int atbol; /* at start or just after \n? (for BOL) */ 721 int ateol; /* just before \n or \0? (for EOL) */ 722 { 723 register sop s; 724 register sopno pc; 725 register onestate here; /* note, macros know this name */ 726 register sopno look; 727 register int i; 728 729 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 730 s = g->strip[pc]; 731 switch (OP(s)) { 732 case OEND: 733 assert(pc == stop-1); 734 break; 735 case OCHAR: /* can't get past this */ 736 break; 737 case OBOL: 738 if (atbol) 739 FWD(st, st, 1); 740 break; 741 case OEOL: 742 if (ateol) 743 FWD(st, st, 1); 744 break; 745 case OANY: /* can't get past this either */ 746 break; 747 case OANYOF: /* or this */ 748 break; 749 case OBACK_: /* not significant here */ 750 case O_BACK: 751 FWD(st, st, 1); 752 break; 753 case OPLUS_: /* forward, this is just an empty */ 754 FWD(st, st, 1); 755 break; 756 case O_PLUS: /* both forward and back */ 757 FWD(st, st, 1); 758 i = ISSETBACK(st, OPND(s)); 759 BACK(st, st, OPND(s)); 760 if (!i && ISSETBACK(st, OPND(s))) { 761 /* oho, must reconsider loop body */ 762 pc -= OPND(s) + 1; 763 INIT(here, pc); 764 } 765 break; 766 case OQUEST_: /* two branches, both forward */ 767 FWD(st, st, 1); 768 FWD(st, st, OPND(s)); 769 break; 770 case O_QUEST: /* just an empty */ 771 FWD(st, st, 1); 772 break; 773 case OLPAREN: /* not significant here */ 774 case ORPAREN: 775 FWD(st, st, 1); 776 break; 777 case OCH_: /* mark the first two branches */ 778 FWD(st, st, 1); 779 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 780 FWD(st, st, OPND(s)); 781 break; 782 case OOR1: /* done a branch, find the O_CH */ 783 if (ISSTATEIN(st, here)) { 784 for (look = 1; 785 OP(s = g->strip[pc+look]) != O_CH; 786 look += OPND(s)) 787 assert(OP(s) == OOR2); 788 FWD(st, st, look); 789 } 790 break; 791 case OOR2: /* propagate OCH_'s marking onward */ 792 FWD(st, st, 1); 793 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 794 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 795 FWD(st, st, OPND(s)); 796 } 797 break; 798 case O_CH: /* just an empty */ 799 FWD(st, st, 1); 800 break; 801 default: /* ooooops... */ 802 assert(0); 803 break; 804 } 805 } 806 807 return(st); 808 } 809 810 /* 811 - step - map set of states reachable before char to set reachable after 812 */ 813 static states 814 step(g, start, stop, bef, ch, aft) 815 register struct re_guts *g; 816 int start; /* start state within strip */ 817 int stop; /* state after stop state within strip */ 818 register states bef; /* states reachable before */ 819 uchar ch; 820 register states aft; /* states already known reachable after */ 821 { 822 register cset *cs; 823 register sop s; 824 register sopno pc; 825 register onestate here; /* note, macros know this name */ 826 register sopno look; 827 register int i; 828 829 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 830 s = g->strip[pc]; 831 switch (OP(s)) { 832 case OEND: 833 assert(pc == stop-1); 834 break; 835 case OCHAR: 836 if (ch == OPND(s)) 837 FWD(aft, bef, 1); 838 break; 839 case OBOL: /* these are looked after by expand() */ 840 case OEOL: 841 break; 842 case OANY: 843 FWD(aft, bef, 1); 844 break; 845 case OANYOF: 846 cs = &g->sets[OPND(s)]; 847 if (CHIN(cs, ch)) 848 FWD(aft, bef, 1); 849 break; 850 case OBACK_: /* ignored here */ 851 case O_BACK: 852 FWD(aft, aft, 1); 853 break; 854 case OPLUS_: /* forward, this is just an empty */ 855 FWD(aft, aft, 1); 856 break; 857 case O_PLUS: /* both forward and back */ 858 FWD(aft, aft, 1); 859 i = ISSETBACK(aft, OPND(s)); 860 BACK(aft, aft, OPND(s)); 861 if (!i && ISSETBACK(aft, OPND(s))) { 862 /* oho, must reconsider loop body */ 863 pc -= OPND(s) + 1; 864 INIT(here, pc); 865 } 866 break; 867 case OQUEST_: /* two branches, both forward */ 868 FWD(aft, aft, 1); 869 FWD(aft, aft, OPND(s)); 870 break; 871 case O_QUEST: /* just an empty */ 872 FWD(aft, aft, 1); 873 break; 874 case OLPAREN: /* not significant here */ 875 case ORPAREN: 876 FWD(aft, aft, 1); 877 break; 878 case OCH_: /* mark the first two branches */ 879 FWD(aft, aft, 1); 880 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 881 FWD(aft, aft, OPND(s)); 882 break; 883 case OOR1: /* done a branch, find the O_CH */ 884 if (ISSTATEIN(aft, here)) { 885 for (look = 1; 886 OP(s = g->strip[pc+look]) != O_CH; 887 look += OPND(s)) 888 assert(OP(s) == OOR2); 889 FWD(aft, aft, look); 890 } 891 break; 892 case OOR2: /* propagate OCH_'s marking */ 893 FWD(aft, aft, 1); 894 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 895 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 896 FWD(aft, aft, OPND(s)); 897 } 898 break; 899 case O_CH: /* just empty */ 900 FWD(aft, aft, 1); 901 break; 902 default: /* ooooops... */ 903 assert(0); 904 break; 905 } 906 } 907 908 return(aft); 909 } 910 911 #ifndef NDEBUG 912 /* 913 - print - print a set of states 914 */ 915 static void 916 print(g, caption, st, ch, d) 917 struct re_guts *g; 918 char *caption; 919 states st; 920 uchar ch; 921 FILE *d; 922 { 923 register int i; 924 register int first = 1; 925 926 fprintf(d, "%s", caption); 927 if (ch != '\0') 928 fprintf(d, " %s", regchar(ch)); 929 for (i = 0; i < g->nstates; i++) 930 if (ISSET(st, i)) { 931 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 932 first = 0; 933 } 934 fprintf(d, "\n"); 935 } 936 #endif 937 938 #undef matcher 939 #undef fast 940 #undef slow 941 #undef dissect 942 #undef backref 943 #undef expand 944 #undef step 945 #undef print 946 #undef match 947