1 /* $NetBSD: engine.c,v 1.7 1997/07/13 20:03:30 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5 * Copyright (c) 1992, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Henry Spencer. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)engine.c 8.5 (Berkeley) 3/20/94 40 */ 41 42 /* 43 * The matching engine and friends. This file is #included by regexec.c 44 * after suitable #defines of a variety of macros used herein, so that 45 * different state representations can be used without duplicating masses 46 * of code. 47 */ 48 49 #ifdef SNAMES 50 #define matcher smatcher 51 #define fast sfast 52 #define slow sslow 53 #define dissect sdissect 54 #define backref sbackref 55 #define step sstep 56 #define print sprint 57 #define at sat 58 #define match smat 59 #endif 60 #ifdef LNAMES 61 #define matcher lmatcher 62 #define fast lfast 63 #define slow lslow 64 #define dissect ldissect 65 #define backref lbackref 66 #define step lstep 67 #define print lprint 68 #define at lat 69 #define match lmat 70 #endif 71 72 /* another structure passed up and down to avoid zillions of parameters */ 73 struct match { 74 struct re_guts *g; 75 int eflags; 76 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 77 char *offp; /* offsets work from here */ 78 char *beginp; /* start of string -- virtual NUL precedes */ 79 char *endp; /* end of string -- virtual NUL here */ 80 char *coldp; /* can be no match starting before here */ 81 char **lastpos; /* [nplus+1] */ 82 STATEVARS; 83 states st; /* current states */ 84 states fresh; /* states for a fresh start */ 85 states tmp; /* temporary */ 86 states empty; /* empty set of states */ 87 }; 88 89 /* ========= begin header generated by ./mkh ========= */ 90 #ifdef __cplusplus 91 extern "C" { 92 #endif 93 94 /* === engine.c === */ 95 static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags)); 96 static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 97 static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev)); 98 static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 99 static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 100 static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft)); 101 #define BOL (OUT+1) 102 #define EOL (BOL+1) 103 #define BOLEOL (BOL+2) 104 #define NOTHING (BOL+3) 105 #define BOW (BOL+4) 106 #define EOW (BOL+5) 107 #define CODEMAX (BOL+5) /* highest code used */ 108 #define NONCHAR(c) ((c) > CHAR_MAX) 109 #define NNONCHAR (CODEMAX-CHAR_MAX) 110 #ifdef REDEBUG 111 static void print __P((struct match *m, char *caption, states st, int ch, FILE *d)); 112 #endif 113 #ifdef REDEBUG 114 static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst)); 115 #endif 116 #ifdef REDEBUG 117 static char *pchar __P((int ch)); 118 #endif 119 120 #ifdef __cplusplus 121 } 122 #endif 123 /* ========= end header generated by ./mkh ========= */ 124 125 #ifdef REDEBUG 126 #define SP(t, s, c) print(m, t, s, c, stdout) 127 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 128 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 129 #else 130 #define SP(t, s, c) /* nothing */ 131 #define AT(t, p1, p2, s1, s2) /* nothing */ 132 #define NOTE(s) /* nothing */ 133 #endif 134 135 /* 136 - matcher - the actual matching engine 137 == static int matcher(register struct re_guts *g, char *string, \ 138 == size_t nmatch, regmatch_t pmatch[], int eflags); 139 */ 140 static int /* 0 success, REG_NOMATCH failure */ 141 matcher(g, string, nmatch, pmatch, eflags) 142 register struct re_guts *g; 143 char *string; 144 size_t nmatch; 145 regmatch_t pmatch[]; 146 int eflags; 147 { 148 register char *endp; 149 register int i; 150 struct match mv; 151 register struct match *m = &mv; 152 register char *dp; 153 const register sopno gf = g->firststate+1; /* +1 for OEND */ 154 const register sopno gl = g->laststate; 155 char *start; 156 char *stop; 157 158 /* simplify the situation where possible */ 159 if (g->cflags®_NOSUB) 160 nmatch = 0; 161 if (eflags®_STARTEND) { 162 start = string + pmatch[0].rm_so; 163 stop = string + pmatch[0].rm_eo; 164 } else { 165 start = string; 166 stop = start + strlen(start); 167 } 168 if (stop < start) 169 return(REG_INVARG); 170 171 /* prescreening; this does wonders for this rather slow code */ 172 if (g->must != NULL) { 173 for (dp = start; dp < stop; dp++) 174 if (*dp == g->must[0] && stop - dp >= g->mlen && 175 memcmp(dp, g->must, (size_t)g->mlen) == 0) 176 break; 177 if (dp == stop) /* we didn't find g->must */ 178 return(REG_NOMATCH); 179 } 180 181 /* match struct setup */ 182 m->g = g; 183 m->eflags = eflags; 184 m->pmatch = NULL; 185 m->lastpos = NULL; 186 m->offp = string; 187 m->beginp = start; 188 m->endp = stop; 189 STATESETUP(m, 4); 190 SETUP(m->st); 191 SETUP(m->fresh); 192 SETUP(m->tmp); 193 SETUP(m->empty); 194 CLEAR(m->empty); 195 196 /* this loop does only one repetition except for backrefs */ 197 for (;;) { 198 endp = fast(m, start, stop, gf, gl); 199 if (endp == NULL) { /* a miss */ 200 STATETEARDOWN(m); 201 return(REG_NOMATCH); 202 } 203 if (nmatch == 0 && !g->backrefs) 204 break; /* no further info needed */ 205 206 /* where? */ 207 assert(m->coldp != NULL); 208 for (;;) { 209 NOTE("finding start"); 210 endp = slow(m, m->coldp, stop, gf, gl); 211 if (endp != NULL) 212 break; 213 assert(m->coldp < m->endp); 214 m->coldp++; 215 } 216 if (nmatch == 1 && !g->backrefs) 217 break; /* no further info needed */ 218 219 /* oh my, he wants the subexpressions... */ 220 if (m->pmatch == NULL) 221 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 222 sizeof(regmatch_t)); 223 if (m->pmatch == NULL) { 224 STATETEARDOWN(m); 225 return(REG_ESPACE); 226 } 227 for (i = 1; i <= m->g->nsub; i++) 228 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 229 if (!g->backrefs && !(m->eflags®_BACKR)) { 230 NOTE("dissecting"); 231 dp = dissect(m, m->coldp, endp, gf, gl); 232 } else { 233 if (g->nplus > 0 && m->lastpos == NULL) 234 m->lastpos = (char **)malloc((g->nplus+1) * 235 sizeof(char *)); 236 if (g->nplus > 0 && m->lastpos == NULL) { 237 free(m->pmatch); 238 STATETEARDOWN(m); 239 return(REG_ESPACE); 240 } 241 NOTE("backref dissect"); 242 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 243 } 244 if (dp != NULL) 245 break; 246 247 /* uh-oh... we couldn't find a subexpression-level match */ 248 assert(g->backrefs); /* must be back references doing it */ 249 assert(g->nplus == 0 || m->lastpos != NULL); 250 for (;;) { 251 if (dp != NULL || endp <= m->coldp) 252 break; /* defeat */ 253 NOTE("backoff"); 254 endp = slow(m, m->coldp, endp-1, gf, gl); 255 if (endp == NULL) 256 break; /* defeat */ 257 /* try it on a shorter possibility */ 258 #ifndef NDEBUG 259 for (i = 1; i <= m->g->nsub; i++) { 260 assert(m->pmatch[i].rm_so == -1); 261 assert(m->pmatch[i].rm_eo == -1); 262 } 263 #endif 264 NOTE("backoff dissect"); 265 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 266 } 267 assert(dp == NULL || dp == endp); 268 if (dp != NULL) /* found a shorter one */ 269 break; 270 271 /* despite initial appearances, there is no match here */ 272 NOTE("false alarm"); 273 start = m->coldp + 1; /* recycle starting later */ 274 assert(start <= stop); 275 } 276 277 /* fill in the details if requested */ 278 if (nmatch > 0) { 279 pmatch[0].rm_so = m->coldp - m->offp; 280 pmatch[0].rm_eo = endp - m->offp; 281 } 282 if (nmatch > 1) { 283 assert(m->pmatch != NULL); 284 for (i = 1; i < nmatch; i++) 285 if (i <= m->g->nsub) 286 pmatch[i] = m->pmatch[i]; 287 else { 288 pmatch[i].rm_so = -1; 289 pmatch[i].rm_eo = -1; 290 } 291 } 292 293 if (m->pmatch != NULL) 294 free((char *)m->pmatch); 295 if (m->lastpos != NULL) 296 free((char *)m->lastpos); 297 STATETEARDOWN(m); 298 return(0); 299 } 300 301 /* 302 - dissect - figure out what matched what, no back references 303 == static char *dissect(register struct match *m, char *start, \ 304 == char *stop, sopno startst, sopno stopst); 305 */ 306 static char * /* == stop (success) always */ 307 dissect(m, start, stop, startst, stopst) 308 register struct match *m; 309 char *start; 310 char *stop; 311 sopno startst; 312 sopno stopst; 313 { 314 register int i; 315 register sopno ss; /* start sop of current subRE */ 316 register sopno es; /* end sop of current subRE */ 317 register char *sp; /* start of string matched by it */ 318 register char *stp; /* string matched by it cannot pass here */ 319 register char *rest; /* start of rest of string */ 320 register char *tail; /* string unmatched by rest of RE */ 321 register sopno ssub; /* start sop of subsubRE */ 322 register sopno esub; /* end sop of subsubRE */ 323 register char *ssp; /* start of string matched by subsubRE */ 324 register char *sep; /* end of string matched by subsubRE */ 325 register char *oldssp; /* previous ssp */ 326 register char *dp; 327 328 AT("diss", start, stop, startst, stopst); 329 sp = start; 330 for (ss = startst; ss < stopst; ss = es) { 331 /* identify end of subRE */ 332 es = ss; 333 switch (OP(m->g->strip[es])) { 334 case OPLUS_: 335 case OQUEST_: 336 es += OPND(m->g->strip[es]); 337 break; 338 case OCH_: 339 while (OP(m->g->strip[es]) != O_CH) 340 es += OPND(m->g->strip[es]); 341 break; 342 } 343 es++; 344 345 /* figure out what it matched */ 346 switch (OP(m->g->strip[ss])) { 347 case OEND: 348 assert(nope); 349 break; 350 case OCHAR: 351 sp++; 352 break; 353 case OBOL: 354 case OEOL: 355 case OBOW: 356 case OEOW: 357 break; 358 case OANY: 359 case OANYOF: 360 sp++; 361 break; 362 case OBACK_: 363 case O_BACK: 364 assert(nope); 365 break; 366 /* cases where length of match is hard to find */ 367 case OQUEST_: 368 stp = stop; 369 for (;;) { 370 /* how long could this one be? */ 371 rest = slow(m, sp, stp, ss, es); 372 assert(rest != NULL); /* it did match */ 373 /* could the rest match the rest? */ 374 tail = slow(m, rest, stop, es, stopst); 375 if (tail == stop) 376 break; /* yes! */ 377 /* no -- try a shorter match for this one */ 378 stp = rest - 1; 379 assert(stp >= sp); /* it did work */ 380 } 381 ssub = ss + 1; 382 esub = es - 1; 383 /* did innards match? */ 384 if (slow(m, sp, rest, ssub, esub) != NULL) { 385 dp = dissect(m, sp, rest, ssub, esub); 386 assert(dp == rest); 387 } else /* no */ 388 assert(sp == rest); 389 sp = rest; 390 break; 391 case OPLUS_: 392 stp = stop; 393 for (;;) { 394 /* how long could this one be? */ 395 rest = slow(m, sp, stp, ss, es); 396 assert(rest != NULL); /* it did match */ 397 /* could the rest match the rest? */ 398 tail = slow(m, rest, stop, es, stopst); 399 if (tail == stop) 400 break; /* yes! */ 401 /* no -- try a shorter match for this one */ 402 stp = rest - 1; 403 assert(stp >= sp); /* it did work */ 404 } 405 ssub = ss + 1; 406 esub = es - 1; 407 ssp = sp; 408 oldssp = ssp; 409 for (;;) { /* find last match of innards */ 410 sep = slow(m, ssp, rest, ssub, esub); 411 if (sep == NULL || sep == ssp) 412 break; /* failed or matched null */ 413 oldssp = ssp; /* on to next try */ 414 ssp = sep; 415 } 416 if (sep == NULL) { 417 /* last successful match */ 418 sep = ssp; 419 ssp = oldssp; 420 } 421 assert(sep == rest); /* must exhaust substring */ 422 assert(slow(m, ssp, sep, ssub, esub) == rest); 423 dp = dissect(m, ssp, sep, ssub, esub); 424 assert(dp == sep); 425 sp = rest; 426 break; 427 case OCH_: 428 stp = stop; 429 for (;;) { 430 /* how long could this one be? */ 431 rest = slow(m, sp, stp, ss, es); 432 assert(rest != NULL); /* it did match */ 433 /* could the rest match the rest? */ 434 tail = slow(m, rest, stop, es, stopst); 435 if (tail == stop) 436 break; /* yes! */ 437 /* no -- try a shorter match for this one */ 438 stp = rest - 1; 439 assert(stp >= sp); /* it did work */ 440 } 441 ssub = ss + 1; 442 esub = ss + OPND(m->g->strip[ss]) - 1; 443 assert(OP(m->g->strip[esub]) == OOR1); 444 for (;;) { /* find first matching branch */ 445 if (slow(m, sp, rest, ssub, esub) == rest) 446 break; /* it matched all of it */ 447 /* that one missed, try next one */ 448 assert(OP(m->g->strip[esub]) == OOR1); 449 esub++; 450 assert(OP(m->g->strip[esub]) == OOR2); 451 ssub = esub + 1; 452 esub += OPND(m->g->strip[esub]); 453 if (OP(m->g->strip[esub]) == OOR2) 454 esub--; 455 else 456 assert(OP(m->g->strip[esub]) == O_CH); 457 } 458 dp = dissect(m, sp, rest, ssub, esub); 459 assert(dp == rest); 460 sp = rest; 461 break; 462 case O_PLUS: 463 case O_QUEST: 464 case OOR1: 465 case OOR2: 466 case O_CH: 467 assert(nope); 468 break; 469 case OLPAREN: 470 i = OPND(m->g->strip[ss]); 471 assert(0 < i && i <= m->g->nsub); 472 m->pmatch[i].rm_so = sp - m->offp; 473 break; 474 case ORPAREN: 475 i = OPND(m->g->strip[ss]); 476 assert(0 < i && i <= m->g->nsub); 477 m->pmatch[i].rm_eo = sp - m->offp; 478 break; 479 default: /* uh oh */ 480 assert(nope); 481 break; 482 } 483 } 484 485 assert(sp == stop); 486 return(sp); 487 } 488 489 /* 490 - backref - figure out what matched what, figuring in back references 491 == static char *backref(register struct match *m, char *start, \ 492 == char *stop, sopno startst, sopno stopst, sopno lev); 493 */ 494 static char * /* == stop (success) or NULL (failure) */ 495 backref(m, start, stop, startst, stopst, lev) 496 register struct match *m; 497 char *start; 498 char *stop; 499 sopno startst; 500 sopno stopst; 501 sopno lev; /* PLUS nesting level */ 502 { 503 register int i; 504 register sopno ss; /* start sop of current subRE */ 505 register char *sp; /* start of string matched by it */ 506 register sopno ssub; /* start sop of subsubRE */ 507 register sopno esub; /* end sop of subsubRE */ 508 register char *ssp; /* start of string matched by subsubRE */ 509 register char *dp; 510 register size_t len; 511 register int hard; 512 register sop s; 513 register regoff_t offsave; 514 register cset *cs; 515 516 AT("back", start, stop, startst, stopst); 517 sp = start; 518 519 /* get as far as we can with easy stuff */ 520 hard = 0; 521 for (ss = startst; !hard && ss < stopst; ss++) 522 switch (OP(s = m->g->strip[ss])) { 523 case OCHAR: 524 if (sp == stop || *sp++ != (char)OPND(s)) 525 return(NULL); 526 break; 527 case OANY: 528 if (sp == stop) 529 return(NULL); 530 sp++; 531 break; 532 case OANYOF: 533 cs = &m->g->sets[OPND(s)]; 534 if (sp == stop || !CHIN(cs, *sp++)) 535 return(NULL); 536 break; 537 case OBOL: 538 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 539 (sp < m->endp && *(sp-1) == '\n' && 540 (m->g->cflags®_NEWLINE)) ) 541 { /* yes */ } 542 else 543 return(NULL); 544 break; 545 case OEOL: 546 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 547 (sp < m->endp && *sp == '\n' && 548 (m->g->cflags®_NEWLINE)) ) 549 { /* yes */ } 550 else 551 return(NULL); 552 break; 553 case OBOW: 554 if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 555 (sp < m->endp && *(sp-1) == '\n' && 556 (m->g->cflags®_NEWLINE)) || 557 (sp > m->beginp && 558 !ISWORD(*(sp-1))) ) && 559 (sp < m->endp && ISWORD(*sp)) ) 560 { /* yes */ } 561 else 562 return(NULL); 563 break; 564 case OEOW: 565 if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 566 (sp < m->endp && *sp == '\n' && 567 (m->g->cflags®_NEWLINE)) || 568 (sp < m->endp && !ISWORD(*sp)) ) && 569 (sp > m->beginp && ISWORD(*(sp-1))) ) 570 { /* yes */ } 571 else 572 return(NULL); 573 break; 574 case O_QUEST: 575 break; 576 case OOR1: /* matches null but needs to skip */ 577 ss++; 578 s = m->g->strip[ss]; 579 do { 580 assert(OP(s) == OOR2); 581 ss += OPND(s); 582 } while (OP(s = m->g->strip[ss]) != O_CH); 583 /* note that the ss++ gets us past the O_CH */ 584 break; 585 default: /* have to make a choice */ 586 hard = 1; 587 break; 588 } 589 if (!hard) { /* that was it! */ 590 if (sp != stop) 591 return(NULL); 592 return(sp); 593 } 594 ss--; /* adjust for the for's final increment */ 595 596 /* the hard stuff */ 597 AT("hard", sp, stop, ss, stopst); 598 s = m->g->strip[ss]; 599 switch (OP(s)) { 600 case OBACK_: /* the vilest depths */ 601 i = OPND(s); 602 assert(0 < i && i <= m->g->nsub); 603 if (m->pmatch[i].rm_eo == -1) 604 return(NULL); 605 assert(m->pmatch[i].rm_so != -1); 606 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 607 assert(stop - m->beginp >= len); 608 if (sp > stop - len) 609 return(NULL); /* not enough left to match */ 610 ssp = m->offp + m->pmatch[i].rm_so; 611 if (memcmp(sp, ssp, len) != 0) 612 return(NULL); 613 while (m->g->strip[ss] != SOP(O_BACK, i)) 614 ss++; 615 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 616 break; 617 case OQUEST_: /* to null or not */ 618 dp = backref(m, sp, stop, ss+1, stopst, lev); 619 if (dp != NULL) 620 return(dp); /* not */ 621 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 622 break; 623 case OPLUS_: 624 assert(m->lastpos != NULL); 625 assert(lev+1 <= m->g->nplus); 626 m->lastpos[lev+1] = sp; 627 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 628 break; 629 case O_PLUS: 630 if (sp == m->lastpos[lev]) /* last pass matched null */ 631 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 632 /* try another pass */ 633 m->lastpos[lev] = sp; 634 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 635 if (dp == NULL) 636 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 637 else 638 return(dp); 639 break; 640 case OCH_: /* find the right one, if any */ 641 ssub = ss + 1; 642 esub = ss + OPND(s) - 1; 643 assert(OP(m->g->strip[esub]) == OOR1); 644 for (;;) { /* find first matching branch */ 645 dp = backref(m, sp, stop, ssub, esub, lev); 646 if (dp != NULL) 647 return(dp); 648 /* that one missed, try next one */ 649 if (OP(m->g->strip[esub]) == O_CH) 650 return(NULL); /* there is none */ 651 esub++; 652 assert(OP(m->g->strip[esub]) == OOR2); 653 ssub = esub + 1; 654 esub += OPND(m->g->strip[esub]); 655 if (OP(m->g->strip[esub]) == OOR2) 656 esub--; 657 else 658 assert(OP(m->g->strip[esub]) == O_CH); 659 } 660 break; 661 case OLPAREN: /* must undo assignment if rest fails */ 662 i = OPND(s); 663 assert(0 < i && i <= m->g->nsub); 664 offsave = m->pmatch[i].rm_so; 665 m->pmatch[i].rm_so = sp - m->offp; 666 dp = backref(m, sp, stop, ss+1, stopst, lev); 667 if (dp != NULL) 668 return(dp); 669 m->pmatch[i].rm_so = offsave; 670 return(NULL); 671 break; 672 case ORPAREN: /* must undo assignment if rest fails */ 673 i = OPND(s); 674 assert(0 < i && i <= m->g->nsub); 675 offsave = m->pmatch[i].rm_eo; 676 m->pmatch[i].rm_eo = sp - m->offp; 677 dp = backref(m, sp, stop, ss+1, stopst, lev); 678 if (dp != NULL) 679 return(dp); 680 m->pmatch[i].rm_eo = offsave; 681 return(NULL); 682 break; 683 default: /* uh oh */ 684 assert(nope); 685 break; 686 } 687 688 /* "can't happen" */ 689 assert(nope); 690 /* NOTREACHED */ 691 return NULL; 692 } 693 694 /* 695 - fast - step through the string at top speed 696 == static char *fast(register struct match *m, char *start, \ 697 == char *stop, sopno startst, sopno stopst); 698 */ 699 static char * /* where tentative match ended, or NULL */ 700 fast(m, start, stop, startst, stopst) 701 register struct match *m; 702 char *start; 703 char *stop; 704 sopno startst; 705 sopno stopst; 706 { 707 register states st = m->st; 708 register states fresh = m->fresh; 709 register states tmp = m->tmp; 710 register char *p = start; 711 register int c = (start == m->beginp) ? OUT : *(start-1); 712 register int lastc; /* previous c */ 713 register int flagch; 714 register int i; 715 register char *coldp; /* last p after which no match was underway */ 716 717 CLEAR(st); 718 SET1(st, startst); 719 st = step(m->g, startst, stopst, st, NOTHING, st); 720 ASSIGN(fresh, st); 721 SP("start", st, *p); 722 coldp = NULL; 723 for (;;) { 724 /* next character */ 725 lastc = c; 726 c = (p == m->endp) ? OUT : *p; 727 if (EQ(st, fresh)) 728 coldp = p; 729 730 /* is there an EOL and/or BOL between lastc and c? */ 731 flagch = '\0'; 732 i = 0; 733 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 734 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 735 flagch = BOL; 736 i = m->g->nbol; 737 } 738 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 739 (c == OUT && !(m->eflags®_NOTEOL)) ) { 740 flagch = (flagch == BOL) ? BOLEOL : EOL; 741 i += m->g->neol; 742 } 743 if (i != 0) { 744 for (; i > 0; i--) 745 st = step(m->g, startst, stopst, st, flagch, st); 746 SP("boleol", st, c); 747 } 748 749 /* how about a word boundary? */ 750 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 751 (c != OUT && ISWORD(c)) ) { 752 flagch = BOW; 753 } 754 if ( (lastc != OUT && ISWORD(lastc)) && 755 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 756 flagch = EOW; 757 } 758 if (flagch == BOW || flagch == EOW) { 759 st = step(m->g, startst, stopst, st, flagch, st); 760 SP("boweow", st, c); 761 } 762 763 /* are we done? */ 764 if (ISSET(st, stopst) || p == stop) 765 break; /* NOTE BREAK OUT */ 766 767 /* no, we must deal with this character */ 768 ASSIGN(tmp, st); 769 ASSIGN(st, fresh); 770 assert(c != OUT); 771 st = step(m->g, startst, stopst, tmp, c, st); 772 SP("aft", st, c); 773 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 774 p++; 775 } 776 777 assert(coldp != NULL); 778 m->coldp = coldp; 779 if (ISSET(st, stopst)) 780 return(p+1); 781 else 782 return(NULL); 783 } 784 785 /* 786 - slow - step through the string more deliberately 787 == static char *slow(register struct match *m, char *start, \ 788 == char *stop, sopno startst, sopno stopst); 789 */ 790 static char * /* where it ended */ 791 slow(m, start, stop, startst, stopst) 792 register struct match *m; 793 char *start; 794 char *stop; 795 sopno startst; 796 sopno stopst; 797 { 798 register states st = m->st; 799 register states empty = m->empty; 800 register states tmp = m->tmp; 801 register char *p = start; 802 register int c = (start == m->beginp) ? OUT : *(start-1); 803 register int lastc; /* previous c */ 804 register int flagch; 805 register int i; 806 register char *matchp; /* last p at which a match ended */ 807 808 AT("slow", start, stop, startst, stopst); 809 CLEAR(st); 810 SET1(st, startst); 811 SP("sstart", st, *p); 812 st = step(m->g, startst, stopst, st, NOTHING, st); 813 matchp = NULL; 814 for (;;) { 815 /* next character */ 816 lastc = c; 817 c = (p == m->endp) ? OUT : *p; 818 819 /* is there an EOL and/or BOL between lastc and c? */ 820 flagch = '\0'; 821 i = 0; 822 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 823 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 824 flagch = BOL; 825 i = m->g->nbol; 826 } 827 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 828 (c == OUT && !(m->eflags®_NOTEOL)) ) { 829 flagch = (flagch == BOL) ? BOLEOL : EOL; 830 i += m->g->neol; 831 } 832 if (i != 0) { 833 for (; i > 0; i--) 834 st = step(m->g, startst, stopst, st, flagch, st); 835 SP("sboleol", st, c); 836 } 837 838 /* how about a word boundary? */ 839 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 840 (c != OUT && ISWORD(c)) ) { 841 flagch = BOW; 842 } 843 if ( (lastc != OUT && ISWORD(lastc)) && 844 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 845 flagch = EOW; 846 } 847 if (flagch == BOW || flagch == EOW) { 848 st = step(m->g, startst, stopst, st, flagch, st); 849 SP("sboweow", st, c); 850 } 851 852 /* are we done? */ 853 if (ISSET(st, stopst)) 854 matchp = p; 855 if (EQ(st, empty) || p == stop) 856 break; /* NOTE BREAK OUT */ 857 858 /* no, we must deal with this character */ 859 ASSIGN(tmp, st); 860 ASSIGN(st, empty); 861 assert(c != OUT); 862 st = step(m->g, startst, stopst, tmp, c, st); 863 SP("saft", st, c); 864 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 865 p++; 866 } 867 868 return(matchp); 869 } 870 871 872 /* 873 - step - map set of states reachable before char to set reachable after 874 == static states step(register struct re_guts *g, sopno start, sopno stop, \ 875 == register states bef, int ch, register states aft); 876 == #define BOL (OUT+1) 877 == #define EOL (BOL+1) 878 == #define BOLEOL (BOL+2) 879 == #define NOTHING (BOL+3) 880 == #define BOW (BOL+4) 881 == #define EOW (BOL+5) 882 == #define CODEMAX (BOL+5) // highest code used 883 == #define NONCHAR(c) ((c) > CHAR_MAX) 884 == #define NNONCHAR (CODEMAX-CHAR_MAX) 885 */ 886 static states 887 step(g, start, stop, bef, ch, aft) 888 register struct re_guts *g; 889 sopno start; /* start state within strip */ 890 sopno stop; /* state after stop state within strip */ 891 register states bef; /* states reachable before */ 892 int ch; /* character or NONCHAR code */ 893 register states aft; /* states already known reachable after */ 894 { 895 register cset *cs; 896 register sop s; 897 register sopno pc; 898 register onestate here; /* note, macros know this name */ 899 register sopno look; 900 register int i; 901 902 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 903 s = g->strip[pc]; 904 switch (OP(s)) { 905 case OEND: 906 assert(pc == stop-1); 907 break; 908 case OCHAR: 909 /* only characters can match */ 910 assert(!NONCHAR(ch) || ch != (char)OPND(s)); 911 if (ch == (char)OPND(s)) 912 FWD(aft, bef, 1); 913 break; 914 case OBOL: 915 if (ch == BOL || ch == BOLEOL) 916 FWD(aft, bef, 1); 917 break; 918 case OEOL: 919 if (ch == EOL || ch == BOLEOL) 920 FWD(aft, bef, 1); 921 break; 922 case OBOW: 923 if (ch == BOW) 924 FWD(aft, bef, 1); 925 break; 926 case OEOW: 927 if (ch == EOW) 928 FWD(aft, bef, 1); 929 break; 930 case OANY: 931 if (!NONCHAR(ch)) 932 FWD(aft, bef, 1); 933 break; 934 case OANYOF: 935 cs = &g->sets[OPND(s)]; 936 if (!NONCHAR(ch) && CHIN(cs, ch)) 937 FWD(aft, bef, 1); 938 break; 939 case OBACK_: /* ignored here */ 940 case O_BACK: 941 FWD(aft, aft, 1); 942 break; 943 case OPLUS_: /* forward, this is just an empty */ 944 FWD(aft, aft, 1); 945 break; 946 case O_PLUS: /* both forward and back */ 947 FWD(aft, aft, 1); 948 i = ISSETBACK(aft, OPND(s)); 949 BACK(aft, aft, OPND(s)); 950 if (!i && ISSETBACK(aft, OPND(s))) { 951 /* oho, must reconsider loop body */ 952 pc -= OPND(s) + 1; 953 INIT(here, pc); 954 } 955 break; 956 case OQUEST_: /* two branches, both forward */ 957 FWD(aft, aft, 1); 958 FWD(aft, aft, OPND(s)); 959 break; 960 case O_QUEST: /* just an empty */ 961 FWD(aft, aft, 1); 962 break; 963 case OLPAREN: /* not significant here */ 964 case ORPAREN: 965 FWD(aft, aft, 1); 966 break; 967 case OCH_: /* mark the first two branches */ 968 FWD(aft, aft, 1); 969 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 970 FWD(aft, aft, OPND(s)); 971 break; 972 case OOR1: /* done a branch, find the O_CH */ 973 if (ISSTATEIN(aft, here)) { 974 for (look = 1; 975 OP(s = g->strip[pc+look]) != O_CH; 976 look += OPND(s)) 977 assert(OP(s) == OOR2); 978 FWD(aft, aft, look); 979 } 980 break; 981 case OOR2: /* propagate OCH_'s marking */ 982 FWD(aft, aft, 1); 983 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 984 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 985 FWD(aft, aft, OPND(s)); 986 } 987 break; 988 case O_CH: /* just empty */ 989 FWD(aft, aft, 1); 990 break; 991 default: /* ooooops... */ 992 assert(nope); 993 break; 994 } 995 } 996 997 return(aft); 998 } 999 1000 #ifdef REDEBUG 1001 /* 1002 - print - print a set of states 1003 == #ifdef REDEBUG 1004 == static void print(struct match *m, char *caption, states st, \ 1005 == int ch, FILE *d); 1006 == #endif 1007 */ 1008 static void 1009 print(m, caption, st, ch, d) 1010 struct match *m; 1011 char *caption; 1012 states st; 1013 int ch; 1014 FILE *d; 1015 { 1016 register struct re_guts *g = m->g; 1017 register int i; 1018 register int first = 1; 1019 1020 if (!(m->eflags®_TRACE)) 1021 return; 1022 1023 fprintf(d, "%s", caption); 1024 if (ch != '\0') 1025 fprintf(d, " %s", pchar(ch)); 1026 for (i = 0; i < g->nstates; i++) 1027 if (ISSET(st, i)) { 1028 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 1029 first = 0; 1030 } 1031 fprintf(d, "\n"); 1032 } 1033 1034 /* 1035 - at - print current situation 1036 == #ifdef REDEBUG 1037 == static void at(struct match *m, char *title, char *start, char *stop, \ 1038 == sopno startst, sopno stopst); 1039 == #endif 1040 */ 1041 static void 1042 at(m, title, start, stop, startst, stopst) 1043 struct match *m; 1044 char *title; 1045 char *start; 1046 char *stop; 1047 sopno startst; 1048 sopno stopst; 1049 { 1050 if (!(m->eflags®_TRACE)) 1051 return; 1052 1053 printf("%s %s-", title, pchar(*start)); 1054 printf("%s ", pchar(*stop)); 1055 printf("%ld-%ld\n", (long)startst, (long)stopst); 1056 } 1057 1058 #ifndef PCHARDONE 1059 #define PCHARDONE /* never again */ 1060 /* 1061 - pchar - make a character printable 1062 == #ifdef REDEBUG 1063 == static char *pchar(int ch); 1064 == #endif 1065 * 1066 * Is this identical to regchar() over in debug.c? Well, yes. But a 1067 * duplicate here avoids having a debugging-capable regexec.o tied to 1068 * a matching debug.o, and this is convenient. It all disappears in 1069 * the non-debug compilation anyway, so it doesn't matter much. 1070 */ 1071 static char * /* -> representation */ 1072 pchar(ch) 1073 int ch; 1074 { 1075 static char pbuf[10]; 1076 1077 if (isprint(ch) || ch == ' ') 1078 (void)snprintf(pbuf, sizeof pbuf, "%c", ch); 1079 else 1080 (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); 1081 return(pbuf); 1082 } 1083 #endif 1084 #endif 1085 1086 #undef matcher 1087 #undef fast 1088 #undef slow 1089 #undef dissect 1090 #undef backref 1091 #undef step 1092 #undef print 1093 #undef at 1094 #undef match 1095