1 /* $OpenBSD: b.c,v 1.14 2007/09/02 15:19:31 deraadt Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 /* lasciate ogne speranza, voi ch'entrate. */ 27 28 #define DEBUG 29 30 #include <ctype.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include "awk.h" 35 #include "ytab.h" 36 37 #define HAT (NCHARS+2) /* matches ^ in regular expr */ 38 /* NCHARS is 2**n */ 39 #define MAXLIN 22 40 41 #define type(v) (v)->nobj /* badly overloaded here */ 42 #define info(v) (v)->ntype /* badly overloaded here */ 43 #define left(v) (v)->narg[0] 44 #define right(v) (v)->narg[1] 45 #define parent(v) (v)->nnext 46 47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 48 #define UNARY case STAR: case PLUS: case QUEST: 49 50 /* encoding in tree Nodes: 51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 52 left is index, right contains value or pointer to value 53 unary (STAR, PLUS, QUEST): left is child, right is null 54 binary (CAT, OR): left and right are children 55 parent contains pointer to parent 56 */ 57 58 59 int *setvec; 60 int *tmpset; 61 int maxsetvec = 0; 62 63 int rtok; /* next token in current re */ 64 int rlxval; 65 static uschar *rlxstr; 66 static uschar *prestr; /* current position in current re */ 67 static uschar *lastre; /* origin of last re */ 68 69 static int setcnt; 70 static int poscnt; 71 72 char *patbeg; 73 int patlen; 74 75 #define NFA 20 /* cache this many dynamic fa's */ 76 fa *fatab[NFA]; 77 int nfatab = 0; /* entries in fatab */ 78 79 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 80 { 81 int i, use, nuse; 82 fa *pfa; 83 static int now = 1; 84 85 if (setvec == 0) { /* first time through any RE */ 86 maxsetvec = MAXLIN; 87 setvec = (int *) calloc(maxsetvec, sizeof(int)); 88 tmpset = (int *) calloc(maxsetvec, sizeof(int)); 89 if (setvec == 0 || tmpset == 0) 90 overflo("out of space initializing makedfa"); 91 } 92 93 if (compile_time) /* a constant for sure */ 94 return mkdfa(s, anchor); 95 for (i = 0; i < nfatab; i++) /* is it there already? */ 96 if (fatab[i]->anchor == anchor 97 && strcmp((const char *) fatab[i]->restr, s) == 0) { 98 fatab[i]->use = now++; 99 return fatab[i]; 100 } 101 pfa = mkdfa(s, anchor); 102 if (nfatab < NFA) { /* room for another */ 103 fatab[nfatab] = pfa; 104 fatab[nfatab]->use = now++; 105 nfatab++; 106 return pfa; 107 } 108 use = fatab[0]->use; /* replace least-recently used */ 109 nuse = 0; 110 for (i = 1; i < nfatab; i++) 111 if (fatab[i]->use < use) { 112 use = fatab[i]->use; 113 nuse = i; 114 } 115 freefa(fatab[nuse]); 116 fatab[nuse] = pfa; 117 pfa->use = now++; 118 return pfa; 119 } 120 121 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 122 /* anchor = 1 for anchored matches, else 0 */ 123 { 124 Node *p, *p1; 125 fa *f; 126 127 p = reparse(s); 128 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 129 /* put ALL STAR in front of reg. exp. */ 130 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 131 /* put FINAL after reg. exp. */ 132 133 poscnt = 0; 134 penter(p1); /* enter parent pointers and leaf indices */ 135 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 136 overflo("out of space for fa"); 137 f->accept = poscnt-1; /* penter has computed number of positions in re */ 138 cfoll(f, p1); /* set up follow sets */ 139 freetr(p1); 140 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) 141 overflo("out of space in makedfa"); 142 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 143 overflo("out of space in makedfa"); 144 *f->posns[1] = 0; 145 f->initstat = makeinit(f, anchor); 146 f->anchor = anchor; 147 f->restr = (uschar *) tostring(s); 148 return f; 149 } 150 151 int makeinit(fa *f, int anchor) 152 { 153 int i, k; 154 155 f->curstat = 2; 156 f->out[2] = 0; 157 f->reset = 0; 158 k = *(f->re[0].lfollow); 159 xfree(f->posns[2]); 160 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 161 overflo("out of space in makeinit"); 162 for (i=0; i <= k; i++) { 163 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 164 } 165 if ((f->posns[2])[1] == f->accept) 166 f->out[2] = 1; 167 for (i=0; i < NCHARS; i++) 168 f->gototab[2][i] = 0; 169 f->curstat = cgoto(f, 2, HAT); 170 if (anchor) { 171 *f->posns[2] = k-1; /* leave out position 0 */ 172 for (i=0; i < k; i++) { 173 (f->posns[0])[i] = (f->posns[2])[i]; 174 } 175 176 f->out[0] = f->out[2]; 177 if (f->curstat != 2) 178 --(*f->posns[f->curstat]); 179 } 180 return f->curstat; 181 } 182 183 void penter(Node *p) /* set up parent pointers and leaf indices */ 184 { 185 switch (type(p)) { 186 LEAF 187 info(p) = poscnt; 188 poscnt++; 189 break; 190 UNARY 191 penter(left(p)); 192 parent(left(p)) = p; 193 break; 194 case CAT: 195 case OR: 196 penter(left(p)); 197 penter(right(p)); 198 parent(left(p)) = p; 199 parent(right(p)) = p; 200 break; 201 default: /* can't happen */ 202 FATAL("can't happen: unknown type %d in penter", type(p)); 203 break; 204 } 205 } 206 207 void freetr(Node *p) /* free parse tree */ 208 { 209 switch (type(p)) { 210 LEAF 211 xfree(p); 212 break; 213 UNARY 214 freetr(left(p)); 215 xfree(p); 216 break; 217 case CAT: 218 case OR: 219 freetr(left(p)); 220 freetr(right(p)); 221 xfree(p); 222 break; 223 default: /* can't happen */ 224 FATAL("can't happen: unknown type %d in freetr", type(p)); 225 break; 226 } 227 } 228 229 /* in the parsing of regular expressions, metacharacters like . have */ 230 /* to be seen literally; \056 is not a metacharacter. */ 231 232 int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 233 { /* only pick up one 8-bit byte (2 chars) */ 234 uschar *p; 235 int n = 0; 236 int i; 237 238 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 239 if (isdigit(*p)) 240 n = 16 * n + *p - '0'; 241 else if (*p >= 'a' && *p <= 'f') 242 n = 16 * n + *p - 'a' + 10; 243 else if (*p >= 'A' && *p <= 'F') 244 n = 16 * n + *p - 'A' + 10; 245 } 246 *pp = (char *) p; 247 return n; 248 } 249 250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 251 252 int quoted(char **pp) /* pick up next thing after a \\ */ 253 /* and increment *pp */ 254 { 255 char *p = *pp; 256 int c; 257 258 if ((c = *p++) == 't') 259 c = '\t'; 260 else if (c == 'n') 261 c = '\n'; 262 else if (c == 'f') 263 c = '\f'; 264 else if (c == 'r') 265 c = '\r'; 266 else if (c == 'b') 267 c = '\b'; 268 else if (c == '\\') 269 c = '\\'; 270 else if (c == 'x') { /* hexadecimal goo follows */ 271 c = hexstr(&p); /* this adds a null if number is invalid */ 272 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 273 int n = c - '0'; 274 if (isoctdigit(*p)) { 275 n = 8 * n + *p++ - '0'; 276 if (isoctdigit(*p)) 277 n = 8 * n + *p++ - '0'; 278 } 279 c = n; 280 } /* else */ 281 /* c = c; */ 282 *pp = p; 283 return c; 284 } 285 286 char *cclenter(const char *argp) /* add a character class */ 287 { 288 int i, c, c2; 289 uschar *p = (uschar *) argp; 290 uschar *op, *bp; 291 static uschar *buf = 0; 292 static int bufsz = 100; 293 294 op = p; 295 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 296 FATAL("out of space for character class [%.10s...] 1", p); 297 bp = buf; 298 for (i = 0; (c = *p++) != 0; ) { 299 if (c == '\\') { 300 c = quoted((char **) &p); 301 } else if (c == '-' && i > 0 && bp[-1] != 0) { 302 if (*p != 0) { 303 c = bp[-1]; 304 c2 = *p++; 305 if (c2 == '\\') 306 c2 = quoted((char **) &p); 307 if (c > c2) { /* empty; ignore */ 308 bp--; 309 i--; 310 continue; 311 } 312 while (c < c2) { 313 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 314 FATAL("out of space for character class [%.10s...] 2", p); 315 *bp++ = ++c; 316 i++; 317 } 318 continue; 319 } 320 } 321 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 322 FATAL("out of space for character class [%.10s...] 3", p); 323 *bp++ = c; 324 i++; 325 } 326 *bp = 0; 327 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 328 xfree(op); 329 return (char *) tostring((char *) buf); 330 } 331 332 void overflo(const char *s) 333 { 334 FATAL("regular expression too big: %.30s...", s); 335 } 336 337 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 338 { 339 int i; 340 int *p; 341 342 switch (type(v)) { 343 LEAF 344 f->re[info(v)].ltype = type(v); 345 f->re[info(v)].lval.np = right(v); 346 while (f->accept >= maxsetvec) { /* guessing here! */ 347 maxsetvec *= 4; 348 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 349 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 350 if (setvec == 0 || tmpset == 0) 351 overflo("out of space in cfoll()"); 352 } 353 for (i = 0; i <= f->accept; i++) 354 setvec[i] = 0; 355 setcnt = 0; 356 follow(v); /* computes setvec and setcnt */ 357 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) 358 overflo("out of space building follow set"); 359 f->re[info(v)].lfollow = p; 360 *p = setcnt; 361 for (i = f->accept; i >= 0; i--) 362 if (setvec[i] == 1) 363 *++p = i; 364 break; 365 UNARY 366 cfoll(f,left(v)); 367 break; 368 case CAT: 369 case OR: 370 cfoll(f,left(v)); 371 cfoll(f,right(v)); 372 break; 373 default: /* can't happen */ 374 FATAL("can't happen: unknown type %d in cfoll", type(v)); 375 } 376 } 377 378 int first(Node *p) /* collects initially active leaves of p into setvec */ 379 /* returns 1 if p matches empty string */ 380 { 381 int b, lp; 382 383 switch (type(p)) { 384 LEAF 385 lp = info(p); /* look for high-water mark of subscripts */ 386 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 387 maxsetvec *= 4; 388 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 389 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 390 if (setvec == 0 || tmpset == 0) 391 overflo("out of space in first()"); 392 } 393 if (setvec[lp] != 1) { 394 setvec[lp] = 1; 395 setcnt++; 396 } 397 if (type(p) == CCL && (*(char *) right(p)) == '\0') 398 return(0); /* empty CCL */ 399 else return(1); 400 case PLUS: 401 if (first(left(p)) == 0) return(0); 402 return(1); 403 case STAR: 404 case QUEST: 405 first(left(p)); 406 return(0); 407 case CAT: 408 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 409 return(1); 410 case OR: 411 b = first(right(p)); 412 if (first(left(p)) == 0 || b == 0) return(0); 413 return(1); 414 } 415 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 416 return(-1); 417 } 418 419 void follow(Node *v) /* collects leaves that can follow v into setvec */ 420 { 421 Node *p; 422 423 if (type(v) == FINAL) 424 return; 425 p = parent(v); 426 switch (type(p)) { 427 case STAR: 428 case PLUS: 429 first(v); 430 follow(p); 431 return; 432 433 case OR: 434 case QUEST: 435 follow(p); 436 return; 437 438 case CAT: 439 if (v == left(p)) { /* v is left child of p */ 440 if (first(right(p)) == 0) { 441 follow(p); 442 return; 443 } 444 } else /* v is right child */ 445 follow(p); 446 return; 447 } 448 } 449 450 int member(int c, const char *sarg) /* is c in s? */ 451 { 452 uschar *s = (uschar *) sarg; 453 454 while (*s) 455 if (c == *s++) 456 return(1); 457 return(0); 458 } 459 460 int match(fa *f, const char *p0) /* shortest match ? */ 461 { 462 int s, ns; 463 uschar *p = (uschar *) p0; 464 465 s = f->reset ? makeinit(f,0) : f->initstat; 466 if (f->out[s]) 467 return(1); 468 do { 469 assert(*p < NCHARS); 470 if ((ns = f->gototab[s][*p]) != 0) 471 s = ns; 472 else 473 s = cgoto(f, s, *p); 474 if (f->out[s]) 475 return(1); 476 } while (*p++ != 0); 477 return(0); 478 } 479 480 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 481 { 482 int s, ns; 483 uschar *p = (uschar *) p0; 484 uschar *q; 485 int i, k; 486 487 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 488 if (f->reset) { 489 f->initstat = s = makeinit(f,1); 490 } else { 491 s = f->initstat; 492 } 493 patbeg = (char *) p; 494 patlen = -1; 495 do { 496 q = p; 497 do { 498 if (f->out[s]) /* final state */ 499 patlen = q-p; 500 assert(*q < NCHARS); 501 if ((ns = f->gototab[s][*q]) != 0) 502 s = ns; 503 else 504 s = cgoto(f, s, *q); 505 if (s == 1) { /* no transition */ 506 if (patlen >= 0) { 507 patbeg = (char *) p; 508 return(1); 509 } 510 else 511 goto nextin; /* no match */ 512 } 513 } while (*q++ != 0); 514 if (f->out[s]) 515 patlen = q-p-1; /* don't count $ */ 516 if (patlen >= 0) { 517 patbeg = (char *) p; 518 return(1); 519 } 520 nextin: 521 s = 2; 522 if (f->reset) { 523 for (i = 2; i <= f->curstat; i++) 524 xfree(f->posns[i]); 525 k = *f->posns[0]; 526 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 527 overflo("out of space in pmatch"); 528 for (i = 0; i <= k; i++) 529 (f->posns[2])[i] = (f->posns[0])[i]; 530 f->initstat = f->curstat = 2; 531 f->out[2] = f->out[0]; 532 for (i = 0; i < NCHARS; i++) 533 f->gototab[2][i] = 0; 534 } 535 } while (*p++ != 0); 536 return (0); 537 } 538 539 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 540 { 541 int s, ns; 542 uschar *p = (uschar *) p0; 543 uschar *q; 544 int i, k; 545 546 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 547 if (f->reset) { 548 f->initstat = s = makeinit(f,1); 549 } else { 550 s = f->initstat; 551 } 552 patlen = -1; 553 while (*p) { 554 q = p; 555 do { 556 if (f->out[s]) /* final state */ 557 patlen = q-p; 558 assert(*q < NCHARS); 559 if ((ns = f->gototab[s][*q]) != 0) 560 s = ns; 561 else 562 s = cgoto(f, s, *q); 563 if (s == 1) { /* no transition */ 564 if (patlen > 0) { 565 patbeg = (char *) p; 566 return(1); 567 } else 568 goto nnextin; /* no nonempty match */ 569 } 570 } while (*q++ != 0); 571 if (f->out[s]) 572 patlen = q-p-1; /* don't count $ */ 573 if (patlen > 0 ) { 574 patbeg = (char *) p; 575 return(1); 576 } 577 nnextin: 578 s = 2; 579 if (f->reset) { 580 for (i = 2; i <= f->curstat; i++) 581 xfree(f->posns[i]); 582 k = *f->posns[0]; 583 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 584 overflo("out of state space"); 585 for (i = 0; i <= k; i++) 586 (f->posns[2])[i] = (f->posns[0])[i]; 587 f->initstat = f->curstat = 2; 588 f->out[2] = f->out[0]; 589 for (i = 0; i < NCHARS; i++) 590 f->gototab[2][i] = 0; 591 } 592 p++; 593 } 594 return (0); 595 } 596 597 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 598 { /* uses relex() to scan regular expression */ 599 Node *np; 600 601 dprintf( ("reparse <%s>\n", p) ); 602 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 603 rtok = relex(); 604 /* GNU compatibility: an empty regexp matches anything */ 605 if (rtok == '\0') 606 /* FATAL("empty regular expression"); previous */ 607 return(op2(ALL, NIL, NIL)); 608 np = regexp(); 609 if (rtok != '\0') 610 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 611 return(np); 612 } 613 614 Node *regexp(void) /* top-level parse of reg expr */ 615 { 616 return (alt(concat(primary()))); 617 } 618 619 Node *primary(void) 620 { 621 Node *np; 622 623 switch (rtok) { 624 case CHAR: 625 np = op2(CHAR, NIL, itonp(rlxval)); 626 rtok = relex(); 627 return (unary(np)); 628 case ALL: 629 rtok = relex(); 630 return (unary(op2(ALL, NIL, NIL))); 631 case DOT: 632 rtok = relex(); 633 return (unary(op2(DOT, NIL, NIL))); 634 case CCL: 635 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 636 rtok = relex(); 637 return (unary(np)); 638 case NCCL: 639 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 640 rtok = relex(); 641 return (unary(np)); 642 case '^': 643 rtok = relex(); 644 return (unary(op2(CHAR, NIL, itonp(HAT)))); 645 case '$': 646 rtok = relex(); 647 return (unary(op2(CHAR, NIL, NIL))); 648 case '(': 649 rtok = relex(); 650 if (rtok == ')') { /* special pleading for () */ 651 rtok = relex(); 652 return unary(op2(CCL, NIL, (Node *) tostring(""))); 653 } 654 np = regexp(); 655 if (rtok == ')') { 656 rtok = relex(); 657 return (unary(np)); 658 } 659 else 660 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 661 default: 662 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 663 } 664 return 0; /*NOTREACHED*/ 665 } 666 667 Node *concat(Node *np) 668 { 669 switch (rtok) { 670 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 671 return (concat(op2(CAT, np, primary()))); 672 } 673 return (np); 674 } 675 676 Node *alt(Node *np) 677 { 678 if (rtok == OR) { 679 rtok = relex(); 680 return (alt(op2(OR, np, concat(primary())))); 681 } 682 return (np); 683 } 684 685 Node *unary(Node *np) 686 { 687 switch (rtok) { 688 case STAR: 689 rtok = relex(); 690 return (unary(op2(STAR, np, NIL))); 691 case PLUS: 692 rtok = relex(); 693 return (unary(op2(PLUS, np, NIL))); 694 case QUEST: 695 rtok = relex(); 696 return (unary(op2(QUEST, np, NIL))); 697 default: 698 return (np); 699 } 700 } 701 702 /* 703 * Character class definitions conformant to the POSIX locale as 704 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 705 * and operating character sets are both ASCII (ISO646) or supersets 706 * thereof. 707 * 708 * Note that to avoid overflowing the temporary buffer used in 709 * relex(), the expanded character class (prior to range expansion) 710 * must be less than twice the size of their full name. 711 */ 712 713 /* Because isblank doesn't show up in any of the header files on any 714 * system i use, it's defined here. if some other locale has a richer 715 * definition of "blank", define HAS_ISBLANK and provide your own 716 * version. 717 * the parentheses here are an attempt to find a path through the maze 718 * of macro definition and/or function and/or version provided. thanks 719 * to nelson beebe for the suggestion; let's see if it works everywhere. 720 */ 721 722 #ifndef HAS_ISBLANK 723 724 int (isblank)(int c) 725 { 726 return c==' ' || c=='\t'; 727 } 728 729 #endif 730 731 struct charclass { 732 const char *cc_name; 733 int cc_namelen; 734 int (*cc_func)(int); 735 } charclasses[] = { 736 { "alnum", 5, isalnum }, 737 { "alpha", 5, isalpha }, 738 { "blank", 5, isblank }, 739 { "cntrl", 5, iscntrl }, 740 { "digit", 5, isdigit }, 741 { "graph", 5, isgraph }, 742 { "lower", 5, islower }, 743 { "print", 5, isprint }, 744 { "punct", 5, ispunct }, 745 { "space", 5, isspace }, 746 { "upper", 5, isupper }, 747 { "xdigit", 6, isxdigit }, 748 { NULL, 0, NULL }, 749 }; 750 751 752 int relex(void) /* lexical analyzer for reparse */ 753 { 754 int c, n; 755 int cflag; 756 static uschar *buf = 0; 757 static int bufsz = 100; 758 uschar *bp; 759 struct charclass *cc; 760 int i; 761 762 switch (c = *prestr++) { 763 case '|': return OR; 764 case '*': return STAR; 765 case '+': return PLUS; 766 case '?': return QUEST; 767 case '.': return DOT; 768 case '\0': prestr--; return '\0'; 769 case '^': 770 case '$': 771 case '(': 772 case ')': 773 return c; 774 case '\\': 775 rlxval = quoted((char **) &prestr); 776 return CHAR; 777 default: 778 rlxval = c; 779 return CHAR; 780 case '[': 781 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 782 FATAL("out of space in reg expr %.10s..", lastre); 783 bp = buf; 784 if (*prestr == '^') { 785 cflag = 1; 786 prestr++; 787 } 788 else 789 cflag = 0; 790 n = 2 * strlen((const char *) prestr)+1; 791 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 792 FATAL("out of space for reg expr %.10s...", lastre); 793 for (; ; ) { 794 if ((c = *prestr++) == '\\') { 795 *bp++ = '\\'; 796 if ((c = *prestr++) == '\0') 797 FATAL("nonterminated character class %.20s...", lastre); 798 *bp++ = c; 799 /* } else if (c == '\n') { */ 800 /* FATAL("newline in character class %.20s...", lastre); */ 801 } else if (c == '[' && *prestr == ':') { 802 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 803 for (cc = charclasses; cc->cc_name; cc++) 804 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 805 break; 806 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 807 prestr[2 + cc->cc_namelen] == ']') { 808 prestr += cc->cc_namelen + 3; 809 for (i = 0; i < NCHARS; i++) { 810 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) 811 FATAL("out of space for reg expr %.10s...", lastre); 812 if (cc->cc_func(i)) { 813 *bp++ = i; 814 n++; 815 } 816 } 817 } else 818 *bp++ = c; 819 } else if (c == '\0') { 820 FATAL("nonterminated character class %.20s", lastre); 821 } else if (bp == buf) { /* 1st char is special */ 822 *bp++ = c; 823 } else if (c == ']') { 824 *bp++ = 0; 825 rlxstr = (uschar *) tostring((char *) buf); 826 if (cflag == 0) 827 return CCL; 828 else 829 return NCCL; 830 } else 831 *bp++ = c; 832 } 833 } 834 } 835 836 int cgoto(fa *f, int s, int c) 837 { 838 int i, j, k; 839 int *p, *q; 840 841 assert(c == HAT || c < NCHARS); 842 while (f->accept >= maxsetvec) { /* guessing here! */ 843 maxsetvec *= 4; 844 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 845 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 846 if (setvec == 0 || tmpset == 0) 847 overflo("out of space in cgoto()"); 848 } 849 for (i = 0; i <= f->accept; i++) 850 setvec[i] = 0; 851 setcnt = 0; 852 /* compute positions of gototab[s,c] into setvec */ 853 p = f->posns[s]; 854 for (i = 1; i <= *p; i++) { 855 if ((k = f->re[p[i]].ltype) != FINAL) { 856 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 857 || (k == DOT && c != 0 && c != HAT) 858 || (k == ALL && c != 0) 859 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 860 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 861 q = f->re[p[i]].lfollow; 862 for (j = 1; j <= *q; j++) { 863 if (q[j] >= maxsetvec) { 864 maxsetvec *= 4; 865 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 866 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 867 if (setvec == 0 || tmpset == 0) 868 overflo("cgoto overflow"); 869 } 870 if (setvec[q[j]] == 0) { 871 setcnt++; 872 setvec[q[j]] = 1; 873 } 874 } 875 } 876 } 877 } 878 /* determine if setvec is a previous state */ 879 tmpset[0] = setcnt; 880 j = 1; 881 for (i = f->accept; i >= 0; i--) 882 if (setvec[i]) { 883 tmpset[j++] = i; 884 } 885 /* tmpset == previous state? */ 886 for (i = 1; i <= f->curstat; i++) { 887 p = f->posns[i]; 888 if ((k = tmpset[0]) != p[0]) 889 goto different; 890 for (j = 1; j <= k; j++) 891 if (tmpset[j] != p[j]) 892 goto different; 893 /* setvec is state i */ 894 f->gototab[s][c] = i; 895 return i; 896 different:; 897 } 898 899 /* add tmpset to current set of states */ 900 if (f->curstat >= NSTATES-1) { 901 f->curstat = 2; 902 f->reset = 1; 903 for (i = 2; i < NSTATES; i++) 904 xfree(f->posns[i]); 905 } else 906 ++(f->curstat); 907 for (i = 0; i < NCHARS; i++) 908 f->gototab[f->curstat][i] = 0; 909 xfree(f->posns[f->curstat]); 910 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) 911 overflo("out of space in cgoto"); 912 913 f->posns[f->curstat] = p; 914 f->gototab[s][c] = f->curstat; 915 for (i = 0; i <= setcnt; i++) 916 p[i] = tmpset[i]; 917 if (setvec[f->accept]) 918 f->out[f->curstat] = 1; 919 else 920 f->out[f->curstat] = 0; 921 return f->curstat; 922 } 923 924 925 void freefa(fa *f) /* free a finite automaton */ 926 { 927 int i; 928 929 if (f == NULL) 930 return; 931 for (i = 0; i <= f->curstat; i++) 932 xfree(f->posns[i]); 933 for (i = 0; i <= f->accept; i++) { 934 xfree(f->re[i].lfollow); 935 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 936 xfree((f->re[i].lval.np)); 937 } 938 xfree(f->restr); 939 xfree(f); 940 } 941