1 /*- 2 * Copyright (c) 1992 Henry Spencer. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer of the University of Toronto. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)engine.c 5.3 (Berkeley) 09/30/92 12 */ 13 14 /* 15 * The matching engine and friends. This file is #included by regexec.c 16 * after suitable #defines of a variety of macros used herein, so that 17 * different state representations can be used without duplicating masses 18 * of code. 19 */ 20 21 #ifdef SNAMES 22 #define matcher smatcher 23 #define fast sfast 24 #define slow sslow 25 #define dissect sdissect 26 #define backref sbackref 27 #define expand sexpand 28 #define step sstep 29 #define print sprint 30 #define at sat 31 #define match smat 32 #endif 33 #ifdef LNAMES 34 #define matcher lmatcher 35 #define fast lfast 36 #define slow lslow 37 #define dissect ldissect 38 #define backref lbackref 39 #define expand lexpand 40 #define step lstep 41 #define print lprint 42 #define at lat 43 #define match lmat 44 #endif 45 46 /* another structure passed up and down to avoid zillions of parameters */ 47 struct match { 48 struct re_guts *g; 49 int eflags; 50 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 51 uchar *offp; /* offsets work from here */ 52 uchar *beginp; /* start of string -- virtual NUL precedes */ 53 uchar *endp; /* end of string -- virtual NUL here */ 54 uchar *coldp; /* can be no match starting before here */ 55 uchar **lastpos; /* [nplus+1] */ 56 STATEVARS; 57 states st; /* current states */ 58 states fresh; /* states for a fresh start */ 59 states tmp; /* temporary */ 60 states empty; /* empty set of states */ 61 }; 62 63 #include "engine.ih" 64 65 #ifdef REDEBUG 66 #define SP(t, s, c) print(m, t, s, c, stdout) 67 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 68 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 69 #else 70 #define SP(t, s, c) /* nothing */ 71 #define AT(t, p1, p2, s1, s2) /* nothing */ 72 #define NOTE(s) /* nothing */ 73 #endif 74 75 /* 76 - matcher - the actual matching engine 77 == static int matcher(register struct re_guts *g, uchar *string, \ 78 == size_t nmatch, regmatch_t pmatch[], int eflags); 79 */ 80 static int /* 0 success, REG_NOMATCH failure */ 81 matcher(g, string, nmatch, pmatch, eflags) 82 register struct re_guts *g; 83 uchar *string; 84 size_t nmatch; 85 regmatch_t pmatch[]; 86 int eflags; 87 { 88 register uchar *endp; 89 register int i; 90 struct match mv; 91 register struct match *m = &mv; 92 register uchar *dp; 93 const register sopno gf = g->firststate+1; /* +1 for OEND */ 94 const register sopno gl = g->laststate; 95 uchar *start; 96 uchar *stop; 97 98 /* simplify the situation where possible */ 99 if (g->cflags®_NOSUB) 100 nmatch = 0; 101 if (eflags®_STARTEND) { 102 start = string + pmatch[0].rm_so; 103 stop = string + pmatch[0].rm_eo; 104 } else { 105 start = string; 106 stop = start + strlen((char *)start); 107 } 108 109 /* prescreening; this does wonders for this rather slow code */ 110 if (g->must != NULL) { 111 for (dp = start; dp < stop; dp++) 112 if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen && 113 strncmp((char *)dp, g->must, (size_t)g->mlen) == 0) 114 break; 115 if (dp == stop) /* we didn't find g->must */ 116 return(REG_NOMATCH); 117 } 118 119 /* match struct setup */ 120 m->g = g; 121 m->eflags = eflags; 122 m->pmatch = NULL; 123 m->lastpos = NULL; 124 m->offp = string; 125 m->beginp = start; 126 m->endp = stop; 127 STATESETUP(m, 4); 128 SETUP(m->st); 129 SETUP(m->fresh); 130 SETUP(m->tmp); 131 SETUP(m->empty); 132 CLEAR(m->empty); 133 134 /* this loop does only one repetition except for backrefs */ 135 for (;;) { 136 endp = fast(m, start, stop, gf, gl); 137 if (endp == NULL) { /* a miss */ 138 STATETEARDOWN(m); 139 return(REG_NOMATCH); 140 } 141 if (nmatch == 0 && !g->backrefs) 142 break; /* no further info needed */ 143 144 /* where? */ 145 assert(m->coldp != NULL); 146 for (;;) { 147 NOTE("finding start"); 148 endp = slow(m, m->coldp, stop, gf, gl); 149 if (endp != NULL) 150 break; 151 assert(m->coldp < m->endp); 152 m->coldp++; 153 } 154 if (nmatch == 1 && !g->backrefs) 155 break; /* no further info needed */ 156 157 /* oh my, he wants the subexpressions... */ 158 if (m->pmatch == NULL) 159 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 160 sizeof(regmatch_t)); 161 if (m->pmatch == NULL) { 162 STATETEARDOWN(m); 163 return(REG_ESPACE); 164 } 165 for (i = 1; i <= m->g->nsub; i++) 166 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 167 if (!g->backrefs && !(m->eflags®_BACKR)) { 168 NOTE("dissecting"); 169 dp = dissect(m, m->coldp, endp, gf, gl); 170 } else { 171 if (g->nplus > 0 && m->lastpos == NULL) 172 m->lastpos = (uchar **)malloc((g->nplus+1) * 173 sizeof(uchar *)); 174 if (g->nplus > 0 && m->lastpos == NULL) { 175 free(m->pmatch); 176 STATETEARDOWN(m); 177 return(REG_ESPACE); 178 } 179 NOTE("backref dissect"); 180 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 181 } 182 if (dp != NULL) 183 break; 184 185 /* uh-oh... we couldn't find a subexpression-level match */ 186 assert(g->backrefs); /* must be back references doing it */ 187 assert(g->nplus == 0 || m->lastpos != NULL); 188 for (;;) { 189 if (dp != NULL || endp <= m->coldp) 190 break; /* defeat */ 191 NOTE("backoff"); 192 endp = slow(m, m->coldp, endp-1, gf, gl); 193 if (endp == NULL) 194 break; /* defeat */ 195 /* try it on a shorter possibility */ 196 #ifndef NDEBUG 197 for (i = 1; i <= m->g->nsub; i++) { 198 assert(m->pmatch[i].rm_so == -1); 199 assert(m->pmatch[i].rm_eo == -1); 200 } 201 #endif 202 NOTE("backoff dissect"); 203 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 204 } 205 assert(dp == NULL || dp == endp); 206 if (dp != NULL) /* found a shorter one */ 207 break; 208 209 /* despite initial appearances, there is no match here */ 210 NOTE("false alarm"); 211 start = m->coldp + 1; /* recycle starting later */ 212 assert(start <= stop); 213 } 214 215 /* fill in the details if requested */ 216 if (nmatch > 0) { 217 pmatch[0].rm_so = m->coldp - m->offp; 218 pmatch[0].rm_eo = endp - m->offp; 219 } 220 if (nmatch > 1) { 221 assert(m->pmatch != NULL); 222 for (i = 1; i < nmatch; i++) 223 if (i <= m->g->nsub) 224 pmatch[i] = m->pmatch[i]; 225 else { 226 pmatch[i].rm_so = -1; 227 pmatch[i].rm_eo = -1; 228 } 229 } 230 231 if (m->pmatch != NULL) 232 free((char *)m->pmatch); 233 if (m->lastpos != NULL) 234 free((char *)m->lastpos); 235 STATETEARDOWN(m); 236 return(0); 237 } 238 239 /* 240 - dissect - figure out what matched what, no back references 241 == static uchar *dissect(register struct match *m, uchar *start, \ 242 == uchar *stop, sopno startst, sopno stopst); 243 */ 244 static uchar * /* == stop (success) always */ 245 dissect(m, start, stop, startst, stopst) 246 register struct match *m; 247 uchar *start; 248 uchar *stop; 249 sopno startst; 250 sopno stopst; 251 { 252 register int i; 253 register sopno ss; /* start sop of current subRE */ 254 register sopno es; /* end sop of current subRE */ 255 register uchar *sp; /* start of string matched by it */ 256 register uchar *stp; /* string matched by it cannot pass here */ 257 register uchar *rest; /* start of rest of string */ 258 register uchar *tail; /* string unmatched by rest of RE */ 259 register sopno ssub; /* start sop of subsubRE */ 260 register sopno esub; /* end sop of subsubRE */ 261 register uchar *ssp; /* start of string matched by subsubRE */ 262 register uchar *sep; /* end of string matched by subsubRE */ 263 register uchar *oldssp; /* previous ssp */ 264 register uchar *dp; 265 266 AT("diss", start, stop, startst, stopst); 267 sp = start; 268 for (ss = startst; ss < stopst; ss = es) { 269 /* identify end of subRE */ 270 es = ss; 271 switch (OP(m->g->strip[es])) { 272 case OPLUS_: 273 case OQUEST_: 274 es += OPND(m->g->strip[es]); 275 break; 276 case OCH_: 277 while (OP(m->g->strip[es]) != O_CH) 278 es += OPND(m->g->strip[es]); 279 break; 280 } 281 es++; 282 283 /* figure out what it matched */ 284 switch (OP(m->g->strip[ss])) { 285 case OEND: 286 assert(0); 287 break; 288 case OCHAR: 289 sp++; 290 break; 291 case OBOL: 292 case OEOL: 293 break; 294 case OANY: 295 case OANYOF: 296 sp++; 297 break; 298 case OBACK_: 299 case O_BACK: 300 assert(0); 301 break; 302 /* cases where length of match is hard to find */ 303 case OQUEST_: 304 stp = stop; 305 for (;;) { 306 /* how long could this one be? */ 307 rest = slow(m, sp, stp, ss, es); 308 assert(rest != NULL); /* it did match */ 309 /* could the rest match the rest? */ 310 tail = slow(m, rest, stop, es, stopst); 311 if (tail == stop) 312 break; /* yes! */ 313 /* no -- try a shorter match for this one */ 314 stp = rest - 1; 315 assert(stp >= sp); /* it did work */ 316 } 317 ssub = ss + 1; 318 esub = es - 1; 319 /* did innards match? */ 320 if (slow(m, sp, rest, ssub, esub) != NULL) { 321 dp = dissect(m, sp, rest, ssub, esub); 322 assert(dp == rest); 323 } else /* no */ 324 assert(sp == rest); 325 sp = rest; 326 break; 327 case OPLUS_: 328 stp = stop; 329 for (;;) { 330 /* how long could this one be? */ 331 rest = slow(m, sp, stp, ss, es); 332 assert(rest != NULL); /* it did match */ 333 /* could the rest match the rest? */ 334 tail = slow(m, rest, stop, es, stopst); 335 if (tail == stop) 336 break; /* yes! */ 337 /* no -- try a shorter match for this one */ 338 stp = rest - 1; 339 assert(stp >= sp); /* it did work */ 340 } 341 ssub = ss + 1; 342 esub = es - 1; 343 ssp = sp; 344 oldssp = ssp; 345 for (;;) { /* find last match of innards */ 346 sep = slow(m, ssp, rest, ssub, esub); 347 if (sep == NULL || sep == ssp) 348 break; /* failed or matched null */ 349 oldssp = ssp; /* on to next try */ 350 ssp = sep; 351 } 352 if (sep == NULL) { 353 /* last successful match */ 354 sep = ssp; 355 ssp = oldssp; 356 } 357 assert(sep == rest); /* must exhaust substring */ 358 assert(slow(m, ssp, sep, ssub, esub) == rest); 359 dp = dissect(m, ssp, sep, ssub, esub); 360 assert(dp == sep); 361 sp = rest; 362 break; 363 case OCH_: 364 stp = stop; 365 for (;;) { 366 /* how long could this one be? */ 367 rest = slow(m, sp, stp, ss, es); 368 assert(rest != NULL); /* it did match */ 369 /* could the rest match the rest? */ 370 tail = slow(m, rest, stop, es, stopst); 371 if (tail == stop) 372 break; /* yes! */ 373 /* no -- try a shorter match for this one */ 374 stp = rest - 1; 375 assert(stp >= sp); /* it did work */ 376 } 377 ssub = ss + 1; 378 esub = ss + OPND(m->g->strip[ss]) - 1; 379 assert(OP(m->g->strip[esub]) == OOR1); 380 for (;;) { /* find first matching branch */ 381 if (slow(m, sp, rest, ssub, esub) == rest) 382 break; /* it matched all of it */ 383 /* that one missed, try next one */ 384 assert(OP(m->g->strip[esub]) == OOR1); 385 esub++; 386 assert(OP(m->g->strip[esub]) == OOR2); 387 ssub = esub + 1; 388 esub += OPND(m->g->strip[esub]); 389 if (OP(m->g->strip[esub]) == OOR2) 390 esub--; 391 else 392 assert(OP(m->g->strip[esub]) == O_CH); 393 } 394 dp = dissect(m, sp, rest, ssub, esub); 395 assert(dp == rest); 396 sp = rest; 397 break; 398 case O_PLUS: 399 case O_QUEST: 400 case OOR1: 401 case OOR2: 402 case O_CH: 403 assert(0); 404 break; 405 case OLPAREN: 406 i = OPND(m->g->strip[ss]); 407 m->pmatch[i].rm_so = sp - m->offp; 408 break; 409 case ORPAREN: 410 i = OPND(m->g->strip[ss]); 411 m->pmatch[i].rm_eo = sp - m->offp; 412 break; 413 default: /* uh oh */ 414 assert(0); 415 break; 416 } 417 } 418 419 assert(sp == stop); 420 return(sp); 421 } 422 423 /* 424 - backref - figure out what matched what, figuring in back references 425 == static uchar *backref(register struct match *m, uchar *start, \ 426 == uchar *stop, sopno startst, sopno stopst, sopno lev); 427 */ 428 static uchar * /* == stop (success) or NULL (failure) */ 429 backref(m, start, stop, startst, stopst, lev) 430 register struct match *m; 431 uchar *start; 432 uchar *stop; 433 sopno startst; 434 sopno stopst; 435 sopno lev; /* PLUS nesting level */ 436 { 437 register int i; 438 register sopno ss; /* start sop of current subRE */ 439 register uchar *sp; /* start of string matched by it */ 440 register sopno ssub; /* start sop of subsubRE */ 441 register sopno esub; /* end sop of subsubRE */ 442 register uchar *ssp; /* start of string matched by subsubRE */ 443 register uchar *dp; 444 register size_t len; 445 register int hard; 446 register sop s; 447 register regoff_t offsave; 448 register cset *cs; 449 450 AT("back", start, stop, startst, stopst); 451 sp = start; 452 453 /* get as far as we can with easy stuff */ 454 hard = 0; 455 for (ss = startst; !hard && ss < stopst; ss++) 456 switch (OP(s = m->g->strip[ss])) { 457 case OCHAR: 458 if (sp == stop || *sp++ != OPND(s)) 459 return(NULL); 460 break; 461 case OANY: 462 if (sp == stop) 463 return(NULL); 464 sp++; 465 break; 466 case OANYOF: 467 cs = &m->g->sets[OPND(s)]; 468 if (sp == stop || !CHIN(cs, *sp++)) 469 return(NULL); 470 break; 471 case OBOL: 472 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 473 (sp < m->endp && *(sp-1) == '\n' && 474 (m->g->cflags®_NEWLINE)) ) 475 { /* yes */ } 476 else 477 return(NULL); 478 break; 479 case OEOL: 480 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 481 (sp < m->endp && *sp == '\n' && 482 (m->g->cflags®_NEWLINE)) ) 483 { /* yes */ } 484 else 485 return(NULL); 486 break; 487 case O_QUEST: 488 break; 489 case OOR1: /* matches null but needs to skip */ 490 ss++; 491 s = m->g->strip[ss]; 492 do { 493 assert(OP(s) == OOR2); 494 ss += OPND(s); 495 } while (OP(s = m->g->strip[ss]) != O_CH); 496 /* note that the ss++ gets us past the O_CH */ 497 break; 498 default: /* have to make a choice */ 499 hard = 1; 500 break; 501 } 502 if (!hard) { /* that was it! */ 503 if (sp != stop) 504 return(NULL); 505 return(sp); 506 } 507 ss--; /* adjust for the for's final increment */ 508 509 /* the hard stuff */ 510 AT("hard", sp, stop, ss, stopst); 511 s = m->g->strip[ss]; 512 switch (OP(s)) { 513 case OBACK_: /* the vilest depths */ 514 i = OPND(s); 515 if (m->pmatch[i].rm_eo == -1) 516 return(NULL); 517 assert(m->pmatch[i].rm_so != -1); 518 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 519 assert(stop - m->beginp >= len); 520 if (sp > stop - len) 521 return(NULL); /* not enough left to match */ 522 ssp = m->offp + m->pmatch[i].rm_so; 523 if (strncmp((char *)sp, (char *)ssp, len) != 0) 524 return(NULL); 525 while (m->g->strip[ss] != SOP(O_BACK, i)) 526 ss++; 527 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 528 break; 529 case OQUEST_: /* to null or not */ 530 dp = backref(m, sp, stop, ss+1, stopst, lev); 531 if (dp != NULL) 532 return(dp); /* not */ 533 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 534 break; 535 case OPLUS_: 536 assert(m->lastpos != NULL); 537 assert(lev+1 <= m->g->nplus); 538 m->lastpos[lev+1] = sp; 539 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 540 break; 541 case O_PLUS: 542 if (sp == m->lastpos[lev]) /* last pass matched null */ 543 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 544 /* try another pass */ 545 m->lastpos[lev] = sp; 546 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 547 if (dp == NULL) 548 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 549 else 550 return(dp); 551 break; 552 case OCH_: /* find the right one, if any */ 553 ssub = ss + 1; 554 esub = ss + OPND(s) - 1; 555 assert(OP(m->g->strip[esub]) == OOR1); 556 for (;;) { /* find first matching branch */ 557 dp = backref(m, sp, stop, ssub, esub, lev); 558 if (dp != NULL) 559 return(dp); 560 /* that one missed, try next one */ 561 if (OP(m->g->strip[esub]) == O_CH) 562 return(NULL); /* there is none */ 563 esub++; 564 assert(OP(m->g->strip[esub]) == OOR2); 565 ssub = esub + 1; 566 esub += OPND(m->g->strip[esub]); 567 if (OP(m->g->strip[esub]) == OOR2) 568 esub--; 569 else 570 assert(OP(m->g->strip[esub]) == O_CH); 571 } 572 break; 573 case OLPAREN: /* must undo assignment if rest fails */ 574 i = OPND(s); 575 offsave = m->pmatch[i].rm_so; 576 m->pmatch[i].rm_so = sp - m->offp; 577 dp = backref(m, sp, stop, ss+1, stopst, lev); 578 if (dp != NULL) 579 return(dp); 580 m->pmatch[i].rm_so = offsave; 581 return(NULL); 582 break; 583 case ORPAREN: /* must undo assignment if rest fails */ 584 i = OPND(s); 585 offsave = m->pmatch[i].rm_eo; 586 m->pmatch[i].rm_eo = sp - m->offp; 587 dp = backref(m, sp, stop, ss+1, stopst, lev); 588 if (dp != NULL) 589 return(dp); 590 m->pmatch[i].rm_eo = offsave; 591 return(NULL); 592 break; 593 default: /* uh oh */ 594 assert(0); 595 break; 596 } 597 598 /* "can't happen" */ 599 assert(0); 600 /* NOTREACHED */ 601 } 602 603 /* 604 - fast - step through the string at top speed 605 == static uchar *fast(register struct match *m, uchar *start, \ 606 == uchar *stop, sopno startst, sopno stopst); 607 */ 608 static uchar * /* where tentative match ended, or NULL */ 609 fast(m, start, stop, startst, stopst) 610 register struct match *m; 611 uchar *start; 612 uchar *stop; 613 sopno startst; 614 sopno stopst; 615 { 616 register states st = m->st; 617 register states fresh = m->fresh; 618 register states tmp = m->tmp; 619 register uchar *p = start; 620 register uchar c = (start == m->beginp) ? '\0' : *(start-1); 621 register uchar lastc; /* previous c */ 622 register int atbol; 623 register int ateol; 624 register uchar *coldp; /* last p after which no match was underway */ 625 626 CLEAR(st); 627 SET1(st, startst); 628 st = expand(m->g, startst, stopst, st, 0, 0); 629 ASSIGN(fresh, st); 630 SP("start", st, *p); 631 coldp = NULL; 632 for (;;) { 633 /* next character */ 634 lastc = c; 635 c = (p == m->endp) ? '\0' : *p; 636 if (EQ(st, fresh)) 637 coldp = p; 638 639 /* is there an EOL and/or BOL between lastc and c? */ 640 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 641 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 642 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 643 (c == '\0' && !(m->eflags®_NOTEOL)) ); 644 if (atbol || ateol) { 645 st = expand(m->g, startst, stopst, st, atbol, ateol); 646 SP("bef", st, c); 647 } 648 649 /* are we done? */ 650 if (ISSET(st, stopst) || p == stop) 651 break; /* NOTE BREAK OUT */ 652 653 /* no, we must deal with this character */ 654 ASSIGN(tmp, st); 655 ASSIGN(st, fresh); 656 st = step(m->g, startst, stopst, tmp, c, st); 657 SP("aft", st, c); 658 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 659 p++; 660 } 661 662 assert(coldp != NULL); 663 m->coldp = coldp; 664 if (ISSET(st, stopst)) 665 return(p+1); 666 else 667 return(NULL); 668 } 669 670 /* 671 - slow - step through the string more deliberately 672 == static uchar *slow(register struct match *m, uchar *start, \ 673 == uchar *stop, sopno startst, sopno stopst); 674 */ 675 static uchar * /* where it ended */ 676 slow(m, start, stop, startst, stopst) 677 register struct match *m; 678 uchar *start; 679 uchar *stop; 680 sopno startst; 681 sopno stopst; 682 { 683 register states st = m->st; 684 register states empty = m->empty; 685 register states tmp = m->tmp; 686 register uchar *p = start; 687 register uchar c = (start == m->beginp) ? '\0' : *(start-1); 688 register uchar lastc; /* previous c */ 689 register int atbol; 690 register int ateol; 691 register uchar *matchp; /* last p at which a match ended */ 692 693 AT("slow", start, stop, startst, stopst); 694 CLEAR(st); 695 SET1(st, startst); 696 SP("sstart", st, *p); 697 matchp = NULL; 698 for (;;) { 699 /* next character */ 700 lastc = c; 701 c = (p == m->endp) ? '\0' : *p; 702 703 /* is there an EOL and/or BOL between lastc and c? */ 704 atbol = ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 705 (lastc == '\0' && !(m->eflags®_NOTBOL)) ); 706 ateol = ( (c == '\n' && m->g->cflags®_NEWLINE) || 707 (c == '\0' && !(m->eflags®_NOTEOL)) ); 708 709 /* do we need an expansion before looking at the char? */ 710 if (p == start || atbol || ateol) { 711 st = expand(m->g, startst, stopst, st, atbol, ateol); 712 SP("sbef", st, c); 713 } 714 715 /* are we done? */ 716 if (ISSET(st, stopst)) 717 matchp = p; 718 if (EQ(st, empty) || p == stop) 719 break; /* NOTE BREAK OUT */ 720 721 /* no, we must deal with this character */ 722 ASSIGN(tmp, st); 723 ASSIGN(st, empty); 724 st = step(m->g, startst, stopst, tmp, c, st); 725 SP("saft", st, c); 726 assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st)); 727 p++; 728 } 729 730 return(matchp); 731 } 732 733 734 /* 735 - expand - return set of states reachable from an initial set 736 == static states expand(register struct re_guts *g, int start, \ 737 == int stop, register states st, int atbol, int ateol); 738 */ 739 static states 740 expand(g, start, stop, st, atbol, ateol) 741 register struct re_guts *g; 742 int start; /* start state within strip */ 743 int stop; /* state after stop state within strip */ 744 register states st; 745 int atbol; /* at start or just after \n? (for BOL) */ 746 int ateol; /* just before \n or \0? (for EOL) */ 747 { 748 register sop s; 749 register sopno pc; 750 register onestate here; /* note, macros know this name */ 751 register sopno look; 752 register int i; 753 754 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 755 s = g->strip[pc]; 756 switch (OP(s)) { 757 case OEND: 758 assert(pc == stop-1); 759 break; 760 case OCHAR: /* can't get past this */ 761 break; 762 case OBOL: 763 if (atbol) 764 FWD(st, st, 1); 765 break; 766 case OEOL: 767 if (ateol) 768 FWD(st, st, 1); 769 break; 770 case OANY: /* can't get past this either */ 771 break; 772 case OANYOF: /* or this */ 773 break; 774 case OBACK_: /* not significant here */ 775 case O_BACK: 776 FWD(st, st, 1); 777 break; 778 case OPLUS_: /* forward, this is just an empty */ 779 FWD(st, st, 1); 780 break; 781 case O_PLUS: /* both forward and back */ 782 FWD(st, st, 1); 783 i = ISSETBACK(st, OPND(s)); 784 BACK(st, st, OPND(s)); 785 if (!i && ISSETBACK(st, OPND(s))) { 786 /* oho, must reconsider loop body */ 787 pc -= OPND(s) + 1; 788 INIT(here, pc); 789 } 790 break; 791 case OQUEST_: /* two branches, both forward */ 792 FWD(st, st, 1); 793 FWD(st, st, OPND(s)); 794 break; 795 case O_QUEST: /* just an empty */ 796 FWD(st, st, 1); 797 break; 798 case OLPAREN: /* not significant here */ 799 case ORPAREN: 800 FWD(st, st, 1); 801 break; 802 case OCH_: /* mark the first two branches */ 803 FWD(st, st, 1); 804 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 805 FWD(st, st, OPND(s)); 806 break; 807 case OOR1: /* done a branch, find the O_CH */ 808 if (ISSTATEIN(st, here)) { 809 for (look = 1; 810 OP(s = g->strip[pc+look]) != O_CH; 811 look += OPND(s)) 812 assert(OP(s) == OOR2); 813 FWD(st, st, look); 814 } 815 break; 816 case OOR2: /* propagate OCH_'s marking onward */ 817 FWD(st, st, 1); 818 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 819 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 820 FWD(st, st, OPND(s)); 821 } 822 break; 823 case O_CH: /* just an empty */ 824 FWD(st, st, 1); 825 break; 826 default: /* ooooops... */ 827 assert(0); 828 break; 829 } 830 } 831 832 return(st); 833 } 834 835 /* 836 - step - map set of states reachable before char to set reachable after 837 == static states step(register struct re_guts *g, int start, int stop, \ 838 == register states bef, uchar ch, register states aft); 839 */ 840 static states 841 step(g, start, stop, bef, ch, aft) 842 register struct re_guts *g; 843 int start; /* start state within strip */ 844 int stop; /* state after stop state within strip */ 845 register states bef; /* states reachable before */ 846 uchar ch; 847 register states aft; /* states already known reachable after */ 848 { 849 register cset *cs; 850 register sop s; 851 register sopno pc; 852 register onestate here; /* note, macros know this name */ 853 register sopno look; 854 register int i; 855 856 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 857 s = g->strip[pc]; 858 switch (OP(s)) { 859 case OEND: 860 assert(pc == stop-1); 861 break; 862 case OCHAR: 863 if (ch == OPND(s)) 864 FWD(aft, bef, 1); 865 break; 866 case OBOL: /* these are looked after by expand() */ 867 case OEOL: 868 break; 869 case OANY: 870 FWD(aft, bef, 1); 871 break; 872 case OANYOF: 873 cs = &g->sets[OPND(s)]; 874 if (CHIN(cs, ch)) 875 FWD(aft, bef, 1); 876 break; 877 case OBACK_: /* ignored here */ 878 case O_BACK: 879 FWD(aft, aft, 1); 880 break; 881 case OPLUS_: /* forward, this is just an empty */ 882 FWD(aft, aft, 1); 883 break; 884 case O_PLUS: /* both forward and back */ 885 FWD(aft, aft, 1); 886 i = ISSETBACK(aft, OPND(s)); 887 BACK(aft, aft, OPND(s)); 888 if (!i && ISSETBACK(aft, OPND(s))) { 889 /* oho, must reconsider loop body */ 890 pc -= OPND(s) + 1; 891 INIT(here, pc); 892 } 893 break; 894 case OQUEST_: /* two branches, both forward */ 895 FWD(aft, aft, 1); 896 FWD(aft, aft, OPND(s)); 897 break; 898 case O_QUEST: /* just an empty */ 899 FWD(aft, aft, 1); 900 break; 901 case OLPAREN: /* not significant here */ 902 case ORPAREN: 903 FWD(aft, aft, 1); 904 break; 905 case OCH_: /* mark the first two branches */ 906 FWD(aft, aft, 1); 907 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 908 FWD(aft, aft, OPND(s)); 909 break; 910 case OOR1: /* done a branch, find the O_CH */ 911 if (ISSTATEIN(aft, here)) { 912 for (look = 1; 913 OP(s = g->strip[pc+look]) != O_CH; 914 look += OPND(s)) 915 assert(OP(s) == OOR2); 916 FWD(aft, aft, look); 917 } 918 break; 919 case OOR2: /* propagate OCH_'s marking */ 920 FWD(aft, aft, 1); 921 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 922 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 923 FWD(aft, aft, OPND(s)); 924 } 925 break; 926 case O_CH: /* just empty */ 927 FWD(aft, aft, 1); 928 break; 929 default: /* ooooops... */ 930 assert(0); 931 break; 932 } 933 } 934 935 return(aft); 936 } 937 938 #ifdef REDEBUG 939 /* 940 - print - print a set of states 941 == #ifdef REDEBUG 942 == static void print(struct match *m, char *caption, states st, \ 943 == uchar ch, FILE *d); 944 == #endif 945 */ 946 static void 947 print(m, caption, st, ch, d) 948 struct match *m; 949 char *caption; 950 states st; 951 uchar ch; 952 FILE *d; 953 { 954 register struct re_guts *g = m->g; 955 register int i; 956 register int first = 1; 957 958 if (!(m->eflags®_TRACE)) 959 return; 960 961 fprintf(d, "%s", caption); 962 if (ch != '\0') 963 fprintf(d, " %s", pchar(ch)); 964 for (i = 0; i < g->nstates; i++) 965 if (ISSET(st, i)) { 966 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 967 first = 0; 968 } 969 fprintf(d, "\n"); 970 } 971 972 /* 973 - at - print current situation 974 == #ifdef REDEBUG 975 == static void at(struct match *m, char *title, uchar *start, uchar *stop, \ 976 == sopno startst, stopno stopst); 977 == #endif 978 */ 979 static void 980 at(m, title, start, stop, startst, stopst) 981 struct match *m; 982 char *title; 983 uchar *start; 984 uchar *stop; 985 sopno startst; 986 sopno stopst; 987 { 988 if (!(m->eflags®_TRACE)) 989 return; 990 991 printf("%s %s-", title, pchar(*start)); 992 printf("%s ", pchar(*stop)); 993 printf("%ld-%ld\n", (long)startst, (long)stopst); 994 } 995 996 #ifndef PCHARDONE 997 #define PCHARDONE /* never again */ 998 /* 999 - pchar - make a character printable 1000 == #ifdef REDEBUG 1001 == static char *pchar(int ch); 1002 == #endif 1003 * 1004 * Is this identical to regchar() over in debug.c? Well, yes. But a 1005 * duplicate here avoids having a debugging-capable regexec.o tied to 1006 * a matching debug.o, and this is convenient. It all disappears in 1007 * the non-debug compilation anyway, so it doesn't matter much. 1008 */ 1009 static char * /* -> representation */ 1010 pchar(ch) 1011 int ch; 1012 { 1013 static char pbuf[10]; 1014 1015 if (isprint(ch) || ch == ' ') 1016 sprintf(pbuf, "%c", ch); 1017 else 1018 sprintf(pbuf, "\\%o", ch); 1019 return(pbuf); 1020 } 1021 #endif 1022 #endif 1023 1024 #undef matcher 1025 #undef fast 1026 #undef slow 1027 #undef dissect 1028 #undef backref 1029 #undef expand 1030 #undef step 1031 #undef print 1032 #undef at 1033 #undef match 1034