1 /* $OpenBSD: b.c,v 1.11 2002/12/19 21:24:28 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 /* lasciate ogne speranza, voi ch'entrate. */ 27 28 #define DEBUG 29 30 #include <ctype.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include "awk.h" 35 #include "ytab.h" 36 37 #define HAT (NCHARS-2) /* matches ^ in regular expr */ 38 /* NCHARS is 2**n */ 39 #define MAXLIN 22 40 41 #define type(v) (v)->nobj /* badly overloaded here */ 42 #define info(v) (v)->ntype /* badly overloaded here */ 43 #define left(v) (v)->narg[0] 44 #define right(v) (v)->narg[1] 45 #define parent(v) (v)->nnext 46 47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 48 #define UNARY case STAR: case PLUS: case QUEST: 49 50 /* encoding in tree Nodes: 51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 52 left is index, right contains value or pointer to value 53 unary (STAR, PLUS, QUEST): left is child, right is null 54 binary (CAT, OR): left and right are children 55 parent contains pointer to parent 56 */ 57 58 59 int *setvec; 60 int *tmpset; 61 int maxsetvec = 0; 62 63 int rtok; /* next token in current re */ 64 int rlxval; 65 static uschar *rlxstr; 66 static uschar *prestr; /* current position in current re */ 67 static uschar *lastre; /* origin of last re */ 68 69 static int setcnt; 70 static int poscnt; 71 72 char *patbeg; 73 int patlen; 74 75 #define NFA 20 /* cache this many dynamic fa's */ 76 fa *fatab[NFA]; 77 int nfatab = 0; /* entries in fatab */ 78 79 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 80 { 81 int i, use, nuse; 82 fa *pfa; 83 static int now = 1; 84 85 if (setvec == 0) { /* first time through any RE */ 86 maxsetvec = MAXLIN; 87 setvec = (int *) malloc(maxsetvec * sizeof(int)); 88 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 89 if (setvec == 0 || tmpset == 0) 90 overflo("out of space initializing makedfa"); 91 } 92 93 if (compile_time) /* a constant for sure */ 94 return mkdfa(s, anchor); 95 for (i = 0; i < nfatab; i++) /* is it there already? */ 96 if (fatab[i]->anchor == anchor 97 && strcmp((const char *) fatab[i]->restr, s) == 0) { 98 fatab[i]->use = now++; 99 return fatab[i]; 100 } 101 pfa = mkdfa(s, anchor); 102 if (nfatab < NFA) { /* room for another */ 103 fatab[nfatab] = pfa; 104 fatab[nfatab]->use = now++; 105 nfatab++; 106 return pfa; 107 } 108 use = fatab[0]->use; /* replace least-recently used */ 109 nuse = 0; 110 for (i = 1; i < nfatab; i++) 111 if (fatab[i]->use < use) { 112 use = fatab[i]->use; 113 nuse = i; 114 } 115 freefa(fatab[nuse]); 116 fatab[nuse] = pfa; 117 pfa->use = now++; 118 return pfa; 119 } 120 121 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 122 /* anchor = 1 for anchored matches, else 0 */ 123 { 124 Node *p, *p1; 125 fa *f; 126 127 p = reparse(s); 128 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 129 /* put ALL STAR in front of reg. exp. */ 130 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 131 /* put FINAL after reg. exp. */ 132 133 poscnt = 0; 134 penter(p1); /* enter parent pointers and leaf indices */ 135 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 136 overflo("out of space for fa"); 137 f->accept = poscnt-1; /* penter has computed number of positions in re */ 138 cfoll(f, p1); /* set up follow sets */ 139 freetr(p1); 140 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 141 overflo("out of space in makedfa"); 142 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 143 overflo("out of space in makedfa"); 144 *f->posns[1] = 0; 145 f->initstat = makeinit(f, anchor); 146 f->anchor = anchor; 147 f->restr = (uschar *) tostring(s); 148 return f; 149 } 150 151 int makeinit(fa *f, int anchor) 152 { 153 int i, k; 154 155 f->curstat = 2; 156 f->out[2] = 0; 157 f->reset = 0; 158 k = *(f->re[0].lfollow); 159 xfree(f->posns[2]); 160 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 161 overflo("out of space in makeinit"); 162 for (i=0; i <= k; i++) { 163 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 164 } 165 if ((f->posns[2])[1] == f->accept) 166 f->out[2] = 1; 167 for (i=0; i < NCHARS; i++) 168 f->gototab[2][i] = 0; 169 f->curstat = cgoto(f, 2, HAT); 170 if (anchor) { 171 *f->posns[2] = k-1; /* leave out position 0 */ 172 for (i=0; i < k; i++) { 173 (f->posns[0])[i] = (f->posns[2])[i]; 174 } 175 176 f->out[0] = f->out[2]; 177 if (f->curstat != 2) 178 --(*f->posns[f->curstat]); 179 } 180 return f->curstat; 181 } 182 183 void penter(Node *p) /* set up parent pointers and leaf indices */ 184 { 185 switch (type(p)) { 186 LEAF 187 info(p) = poscnt; 188 poscnt++; 189 break; 190 UNARY 191 penter(left(p)); 192 parent(left(p)) = p; 193 break; 194 case CAT: 195 case OR: 196 penter(left(p)); 197 penter(right(p)); 198 parent(left(p)) = p; 199 parent(right(p)) = p; 200 break; 201 default: /* can't happen */ 202 FATAL("can't happen: unknown type %d in penter", type(p)); 203 break; 204 } 205 } 206 207 void freetr(Node *p) /* free parse tree */ 208 { 209 switch (type(p)) { 210 LEAF 211 xfree(p); 212 break; 213 UNARY 214 freetr(left(p)); 215 xfree(p); 216 break; 217 case CAT: 218 case OR: 219 freetr(left(p)); 220 freetr(right(p)); 221 xfree(p); 222 break; 223 default: /* can't happen */ 224 FATAL("can't happen: unknown type %d in freetr", type(p)); 225 break; 226 } 227 } 228 229 /* in the parsing of regular expressions, metacharacters like . have */ 230 /* to be seen literally; \056 is not a metacharacter. */ 231 232 int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 233 { /* only pick up one 8-bit byte (2 chars) */ 234 uschar *p; 235 int n = 0; 236 int i; 237 238 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 239 if (isdigit(*p)) 240 n = 16 * n + *p - '0'; 241 else if (*p >= 'a' && *p <= 'f') 242 n = 16 * n + *p - 'a' + 10; 243 else if (*p >= 'A' && *p <= 'F') 244 n = 16 * n + *p - 'A' + 10; 245 } 246 *pp = (char *) p; 247 return n; 248 } 249 250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 251 252 int quoted(char **pp) /* pick up next thing after a \\ */ 253 /* and increment *pp */ 254 { 255 char *p = *pp; 256 int c; 257 258 if ((c = *p++) == 't') 259 c = '\t'; 260 else if (c == 'n') 261 c = '\n'; 262 else if (c == 'f') 263 c = '\f'; 264 else if (c == 'r') 265 c = '\r'; 266 else if (c == 'b') 267 c = '\b'; 268 else if (c == '\\') 269 c = '\\'; 270 else if (c == 'x') { /* hexadecimal goo follows */ 271 c = hexstr(&p); /* this adds a null if number is invalid */ 272 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 273 int n = c - '0'; 274 if (isoctdigit(*p)) { 275 n = 8 * n + *p++ - '0'; 276 if (isoctdigit(*p)) 277 n = 8 * n + *p++ - '0'; 278 } 279 c = n; 280 } /* else */ 281 /* c = c; */ 282 *pp = p; 283 return c; 284 } 285 286 char *cclenter(const char *argp) /* add a character class */ 287 { 288 int i, c, c2; 289 uschar *p = (uschar *) argp; 290 uschar *op, *bp; 291 static uschar *buf = 0; 292 static int bufsz = 100; 293 294 op = p; 295 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 296 FATAL("out of space for character class [%.10s...] 1", p); 297 bp = buf; 298 for (i = 0; (c = *p++) != 0; ) { 299 if (c == '\\') { 300 c = quoted((char **) &p); 301 } else if (c == '-' && i > 0 && bp[-1] != 0) { 302 if (*p != 0) { 303 c = bp[-1]; 304 c2 = *p++; 305 if (c2 == '\\') 306 c2 = quoted((char **) &p); 307 if (c > c2) { /* empty; ignore */ 308 bp--; 309 i--; 310 continue; 311 } 312 while (c < c2) { 313 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 314 FATAL("out of space for character class [%.10s...] 2", p); 315 *bp++ = ++c; 316 i++; 317 } 318 continue; 319 } 320 } 321 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 322 FATAL("out of space for character class [%.10s...] 3", p); 323 *bp++ = c; 324 i++; 325 } 326 *bp = 0; 327 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 328 xfree(op); 329 return (char *) tostring((char *) buf); 330 } 331 332 void overflo(const char *s) 333 { 334 FATAL("regular expression too big: %.30s...", s); 335 } 336 337 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 338 { 339 int i; 340 int *p; 341 342 switch (type(v)) { 343 LEAF 344 f->re[info(v)].ltype = type(v); 345 f->re[info(v)].lval.np = right(v); 346 while (f->accept >= maxsetvec) { /* guessing here! */ 347 maxsetvec *= 4; 348 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 349 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 350 if (setvec == 0 || tmpset == 0) 351 overflo("out of space in cfoll()"); 352 } 353 for (i = 0; i <= f->accept; i++) 354 setvec[i] = 0; 355 setcnt = 0; 356 follow(v); /* computes setvec and setcnt */ 357 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 358 overflo("out of space building follow set"); 359 f->re[info(v)].lfollow = p; 360 *p = setcnt; 361 for (i = f->accept; i >= 0; i--) 362 if (setvec[i] == 1) 363 *++p = i; 364 break; 365 UNARY 366 cfoll(f,left(v)); 367 break; 368 case CAT: 369 case OR: 370 cfoll(f,left(v)); 371 cfoll(f,right(v)); 372 break; 373 default: /* can't happen */ 374 FATAL("can't happen: unknown type %d in cfoll", type(v)); 375 } 376 } 377 378 int first(Node *p) /* collects initially active leaves of p into setvec */ 379 /* returns 1 if p matches empty string */ 380 { 381 int b, lp; 382 383 switch (type(p)) { 384 LEAF 385 lp = info(p); /* look for high-water mark of subscripts */ 386 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 387 maxsetvec *= 4; 388 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 389 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 390 if (setvec == 0 || tmpset == 0) 391 overflo("out of space in first()"); 392 } 393 if (setvec[lp] != 1) { 394 setvec[lp] = 1; 395 setcnt++; 396 } 397 if (type(p) == CCL && (*(char *) right(p)) == '\0') 398 return(0); /* empty CCL */ 399 else return(1); 400 case PLUS: 401 if (first(left(p)) == 0) return(0); 402 return(1); 403 case STAR: 404 case QUEST: 405 first(left(p)); 406 return(0); 407 case CAT: 408 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 409 return(1); 410 case OR: 411 b = first(right(p)); 412 if (first(left(p)) == 0 || b == 0) return(0); 413 return(1); 414 } 415 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 416 return(-1); 417 } 418 419 void follow(Node *v) /* collects leaves that can follow v into setvec */ 420 { 421 Node *p; 422 423 if (type(v) == FINAL) 424 return; 425 p = parent(v); 426 switch (type(p)) { 427 case STAR: 428 case PLUS: 429 first(v); 430 follow(p); 431 return; 432 433 case OR: 434 case QUEST: 435 follow(p); 436 return; 437 438 case CAT: 439 if (v == left(p)) { /* v is left child of p */ 440 if (first(right(p)) == 0) { 441 follow(p); 442 return; 443 } 444 } else /* v is right child */ 445 follow(p); 446 return; 447 } 448 } 449 450 int member(int c, const char *sarg) /* is c in s? */ 451 { 452 uschar *s = (uschar *) sarg; 453 454 while (*s) 455 if (c == *s++) 456 return(1); 457 return(0); 458 } 459 460 int match(fa *f, const char *p0) /* shortest match ? */ 461 { 462 int s, ns; 463 uschar *p = (uschar *) p0; 464 465 s = f->reset ? makeinit(f,0) : f->initstat; 466 if (f->out[s]) 467 return(1); 468 do { 469 if ((ns = f->gototab[s][*p]) != 0) 470 s = ns; 471 else 472 s = cgoto(f, s, *p); 473 if (f->out[s]) 474 return(1); 475 } while (*p++ != 0); 476 return(0); 477 } 478 479 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 480 { 481 int s, ns; 482 uschar *p = (uschar *) p0; 483 uschar *q; 484 int i, k; 485 486 s = f->reset ? makeinit(f,1) : f->initstat; 487 patbeg = (char *) p; 488 patlen = -1; 489 do { 490 q = p; 491 do { 492 if (f->out[s]) /* final state */ 493 patlen = q-p; 494 if ((ns = f->gototab[s][*q]) != 0) 495 s = ns; 496 else 497 s = cgoto(f, s, *q); 498 if (s == 1) { /* no transition */ 499 if (patlen >= 0) { 500 patbeg = (char *) p; 501 return(1); 502 } 503 else 504 goto nextin; /* no match */ 505 } 506 } while (*q++ != 0); 507 if (f->out[s]) 508 patlen = q-p-1; /* don't count $ */ 509 if (patlen >= 0) { 510 patbeg = (char *) p; 511 return(1); 512 } 513 nextin: 514 s = 2; 515 if (f->reset) { 516 for (i = 2; i <= f->curstat; i++) 517 xfree(f->posns[i]); 518 k = *f->posns[0]; 519 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 520 overflo("out of space in pmatch"); 521 for (i = 0; i <= k; i++) 522 (f->posns[2])[i] = (f->posns[0])[i]; 523 f->initstat = f->curstat = 2; 524 f->out[2] = f->out[0]; 525 for (i = 0; i < NCHARS; i++) 526 f->gototab[2][i] = 0; 527 } 528 } while (*p++ != 0); 529 return (0); 530 } 531 532 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 533 { 534 int s, ns; 535 uschar *p = (uschar *) p0; 536 uschar *q; 537 int i, k; 538 539 s = f->reset ? makeinit(f,1) : f->initstat; 540 patlen = -1; 541 while (*p) { 542 q = p; 543 do { 544 if (f->out[s]) /* final state */ 545 patlen = q-p; 546 if ((ns = f->gototab[s][*q]) != 0) 547 s = ns; 548 else 549 s = cgoto(f, s, *q); 550 if (s == 1) { /* no transition */ 551 if (patlen > 0) { 552 patbeg = (char *) p; 553 return(1); 554 } else 555 goto nnextin; /* no nonempty match */ 556 } 557 } while (*q++ != 0); 558 if (f->out[s]) 559 patlen = q-p-1; /* don't count $ */ 560 if (patlen > 0 ) { 561 patbeg = (char *) p; 562 return(1); 563 } 564 nnextin: 565 s = 2; 566 if (f->reset) { 567 for (i = 2; i <= f->curstat; i++) 568 xfree(f->posns[i]); 569 k = *f->posns[0]; 570 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 571 overflo("out of state space"); 572 for (i = 0; i <= k; i++) 573 (f->posns[2])[i] = (f->posns[0])[i]; 574 f->initstat = f->curstat = 2; 575 f->out[2] = f->out[0]; 576 for (i = 0; i < NCHARS; i++) 577 f->gototab[2][i] = 0; 578 } 579 p++; 580 } 581 return (0); 582 } 583 584 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 585 { /* uses relex() to scan regular expression */ 586 Node *np; 587 588 dprintf( ("reparse <%s>\n", p) ); 589 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 590 rtok = relex(); 591 /* GNU compatibility: an empty regexp matches anything */ 592 if (rtok == '\0') 593 /* FATAL("empty regular expression"); previous */ 594 return(op2(ALL, NIL, NIL)); 595 np = regexp(); 596 if (rtok != '\0') 597 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 598 return(np); 599 } 600 601 Node *regexp(void) /* top-level parse of reg expr */ 602 { 603 return (alt(concat(primary()))); 604 } 605 606 Node *primary(void) 607 { 608 Node *np; 609 610 switch (rtok) { 611 case CHAR: 612 np = op2(CHAR, NIL, itonp(rlxval)); 613 rtok = relex(); 614 return (unary(np)); 615 case ALL: 616 rtok = relex(); 617 return (unary(op2(ALL, NIL, NIL))); 618 case DOT: 619 rtok = relex(); 620 return (unary(op2(DOT, NIL, NIL))); 621 case CCL: 622 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 623 rtok = relex(); 624 return (unary(np)); 625 case NCCL: 626 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 627 rtok = relex(); 628 return (unary(np)); 629 case '^': 630 rtok = relex(); 631 return (unary(op2(CHAR, NIL, itonp(HAT)))); 632 case '$': 633 rtok = relex(); 634 return (unary(op2(CHAR, NIL, NIL))); 635 case '(': 636 rtok = relex(); 637 if (rtok == ')') { /* special pleading for () */ 638 rtok = relex(); 639 return unary(op2(CCL, NIL, (Node *) tostring(""))); 640 } 641 np = regexp(); 642 if (rtok == ')') { 643 rtok = relex(); 644 return (unary(np)); 645 } 646 else 647 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 648 default: 649 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 650 } 651 return 0; /*NOTREACHED*/ 652 } 653 654 Node *concat(Node *np) 655 { 656 switch (rtok) { 657 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 658 return (concat(op2(CAT, np, primary()))); 659 } 660 return (np); 661 } 662 663 Node *alt(Node *np) 664 { 665 if (rtok == OR) { 666 rtok = relex(); 667 return (alt(op2(OR, np, concat(primary())))); 668 } 669 return (np); 670 } 671 672 Node *unary(Node *np) 673 { 674 switch (rtok) { 675 case STAR: 676 rtok = relex(); 677 return (unary(op2(STAR, np, NIL))); 678 case PLUS: 679 rtok = relex(); 680 return (unary(op2(PLUS, np, NIL))); 681 case QUEST: 682 rtok = relex(); 683 return (unary(op2(QUEST, np, NIL))); 684 default: 685 return (np); 686 } 687 } 688 689 /* 690 * Character class definitions conformant to the POSIX locale as 691 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 692 * and operating character sets are both ASCII (ISO646) or supersets 693 * thereof. 694 * 695 * Note that to avoid overflowing the temporary buffer used in 696 * relex(), the expanded character class (prior to range expansion) 697 * must be less than twice the size of their full name. 698 */ 699 struct charclass { 700 const char *cc_name; 701 int cc_namelen; 702 const char *cc_expand; 703 } charclasses[] = { 704 { "alnum", 5, "0-9A-Za-z" }, 705 { "alpha", 5, "A-Za-z" }, 706 { "blank", 5, " \t" }, 707 { "cntrl", 5, "\000-\037\177" }, 708 { "digit", 5, "0-9" }, 709 { "graph", 5, "\041-\176" }, 710 { "lower", 5, "a-z" }, 711 { "print", 5, " \041-\176" }, 712 { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" }, 713 { "space", 5, " \f\n\r\t\v" }, 714 { "upper", 5, "A-Z" }, 715 { "xdigit", 6, "0-9A-Fa-f" }, 716 { NULL, 0, NULL }, 717 }; 718 719 720 int relex(void) /* lexical analyzer for reparse */ 721 { 722 int c, n; 723 int cflag; 724 static uschar *buf = 0; 725 static int bufsz = 100; 726 uschar *bp; 727 struct charclass *cc; 728 const uschar *p; 729 730 switch (c = *prestr++) { 731 case '|': return OR; 732 case '*': return STAR; 733 case '+': return PLUS; 734 case '?': return QUEST; 735 case '.': return DOT; 736 case '\0': prestr--; return '\0'; 737 case '^': 738 case '$': 739 case '(': 740 case ')': 741 return c; 742 case '\\': 743 rlxval = quoted((char **) &prestr); 744 return CHAR; 745 default: 746 rlxval = c; 747 return CHAR; 748 case '[': 749 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 750 FATAL("out of space in reg expr %.10s..", lastre); 751 bp = buf; 752 if (*prestr == '^') { 753 cflag = 1; 754 prestr++; 755 } 756 else 757 cflag = 0; 758 n = 2 * strlen((const char *) prestr)+1; 759 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 760 FATAL("out of space for reg expr %.10s...", lastre); 761 for (; ; ) { 762 if ((c = *prestr++) == '\\') { 763 *bp++ = '\\'; 764 if ((c = *prestr++) == '\0') 765 FATAL("nonterminated character class %.20s...", lastre); 766 *bp++ = c; 767 /* } else if (c == '\n') { */ 768 /* FATAL("newline in character class %.20s...", lastre); */ 769 } else if (c == '[' && *prestr == ':') { 770 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 771 for (cc = charclasses; cc->cc_name; cc++) 772 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 773 break; 774 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 775 prestr[2 + cc->cc_namelen] == ']') { 776 prestr += cc->cc_namelen + 3; 777 for (p = (const uschar *) cc->cc_expand; *p; p++) 778 *bp++ = *p; 779 } else 780 *bp++ = c; 781 } else if (c == '\0') { 782 FATAL("nonterminated character class %.20s", lastre); 783 } else if (bp == buf) { /* 1st char is special */ 784 *bp++ = c; 785 } else if (c == ']') { 786 *bp++ = 0; 787 rlxstr = (uschar *) tostring((char *) buf); 788 if (cflag == 0) 789 return CCL; 790 else 791 return NCCL; 792 } else 793 *bp++ = c; 794 } 795 } 796 } 797 798 int cgoto(fa *f, int s, int c) 799 { 800 int i, j, k; 801 int *p, *q; 802 803 if (c < 0 || c > 255) 804 FATAL("can't happen: neg char %d in cgoto", c); 805 while (f->accept >= maxsetvec) { /* guessing here! */ 806 maxsetvec *= 4; 807 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 808 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 809 if (setvec == 0 || tmpset == 0) 810 overflo("out of space in cgoto()"); 811 } 812 for (i = 0; i <= f->accept; i++) 813 setvec[i] = 0; 814 setcnt = 0; 815 /* compute positions of gototab[s,c] into setvec */ 816 p = f->posns[s]; 817 for (i = 1; i <= *p; i++) { 818 if ((k = f->re[p[i]].ltype) != FINAL) { 819 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 820 || (k == DOT && c != 0 && c != HAT) 821 || (k == ALL && c != 0) 822 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 823 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 824 q = f->re[p[i]].lfollow; 825 for (j = 1; j <= *q; j++) { 826 if (q[j] >= maxsetvec) { 827 maxsetvec *= 4; 828 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 829 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 830 if (setvec == 0 || tmpset == 0) 831 overflo("cgoto overflow"); 832 } 833 if (setvec[q[j]] == 0) { 834 setcnt++; 835 setvec[q[j]] = 1; 836 } 837 } 838 } 839 } 840 } 841 /* determine if setvec is a previous state */ 842 tmpset[0] = setcnt; 843 j = 1; 844 for (i = f->accept; i >= 0; i--) 845 if (setvec[i]) { 846 tmpset[j++] = i; 847 } 848 /* tmpset == previous state? */ 849 for (i = 1; i <= f->curstat; i++) { 850 p = f->posns[i]; 851 if ((k = tmpset[0]) != p[0]) 852 goto different; 853 for (j = 1; j <= k; j++) 854 if (tmpset[j] != p[j]) 855 goto different; 856 /* setvec is state i */ 857 f->gototab[s][c] = i; 858 return i; 859 different:; 860 } 861 862 /* add tmpset to current set of states */ 863 if (f->curstat >= NSTATES-1) { 864 f->curstat = 2; 865 f->reset = 1; 866 for (i = 2; i < NSTATES; i++) 867 xfree(f->posns[i]); 868 } else 869 ++(f->curstat); 870 for (i = 0; i < NCHARS; i++) 871 f->gototab[f->curstat][i] = 0; 872 xfree(f->posns[f->curstat]); 873 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 874 overflo("out of space in cgoto"); 875 876 f->posns[f->curstat] = p; 877 f->gototab[s][c] = f->curstat; 878 for (i = 0; i <= setcnt; i++) 879 p[i] = tmpset[i]; 880 if (setvec[f->accept]) 881 f->out[f->curstat] = 1; 882 else 883 f->out[f->curstat] = 0; 884 return f->curstat; 885 } 886 887 888 void freefa(fa *f) /* free a finite automaton */ 889 { 890 int i; 891 892 if (f == NULL) 893 return; 894 for (i = 0; i <= f->curstat; i++) 895 xfree(f->posns[i]); 896 for (i = 0; i <= f->accept; i++) { 897 xfree(f->re[i].lfollow); 898 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 899 xfree((f->re[i].lval.np)); 900 } 901 xfree(f->restr); 902 xfree(f); 903 } 904