1 /* $OpenBSD: b.c,v 1.10 2001/09/08 00:12:40 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 /* lasciate ogne speranza, voi ch'entrate. */ 27 28 #define DEBUG 29 30 #include <ctype.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include "awk.h" 35 #include "ytab.h" 36 37 #define HAT (NCHARS-2) /* matches ^ in regular expr */ 38 /* NCHARS is 2**n */ 39 #define MAXLIN 22 40 41 #define type(v) (v)->nobj /* badly overloaded here */ 42 #define info(v) (v)->ntype /* badly overloaded here */ 43 #define left(v) (v)->narg[0] 44 #define right(v) (v)->narg[1] 45 #define parent(v) (v)->nnext 46 47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 48 #define UNARY case STAR: case PLUS: case QUEST: 49 50 /* encoding in tree Nodes: 51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 52 left is index, right contains value or pointer to value 53 unary (STAR, PLUS, QUEST): left is child, right is null 54 binary (CAT, OR): left and right are children 55 parent contains pointer to parent 56 */ 57 58 59 int *setvec; 60 int *tmpset; 61 int maxsetvec = 0; 62 63 int rtok; /* next token in current re */ 64 int rlxval; 65 static uschar *rlxstr; 66 static uschar *prestr; /* current position in current re */ 67 static uschar *lastre; /* origin of last re */ 68 69 static int setcnt; 70 static int poscnt; 71 72 char *patbeg; 73 int patlen; 74 75 #define NFA 20 /* cache this many dynamic fa's */ 76 fa *fatab[NFA]; 77 int nfatab = 0; /* entries in fatab */ 78 79 fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */ 80 { 81 int i, use, nuse; 82 fa *pfa; 83 static int now = 1; 84 85 if (setvec == 0) { /* first time through any RE */ 86 maxsetvec = MAXLIN; 87 setvec = (int *) malloc(maxsetvec * sizeof(int)); 88 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 89 if (setvec == 0 || tmpset == 0) 90 overflo("out of space initializing makedfa"); 91 } 92 93 if (compile_time) /* a constant for sure */ 94 return mkdfa(s, anchor); 95 for (i = 0; i < nfatab; i++) /* is it there already? */ 96 if (fatab[i]->anchor == anchor 97 && strcmp(fatab[i]->restr, s) == 0) { 98 fatab[i]->use = now++; 99 return fatab[i]; 100 } 101 pfa = mkdfa(s, anchor); 102 if (nfatab < NFA) { /* room for another */ 103 fatab[nfatab] = pfa; 104 fatab[nfatab]->use = now++; 105 nfatab++; 106 return pfa; 107 } 108 use = fatab[0]->use; /* replace least-recently used */ 109 nuse = 0; 110 for (i = 1; i < nfatab; i++) 111 if (fatab[i]->use < use) { 112 use = fatab[i]->use; 113 nuse = i; 114 } 115 freefa(fatab[nuse]); 116 fatab[nuse] = pfa; 117 pfa->use = now++; 118 return pfa; 119 } 120 121 fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */ 122 /* anchor = 1 for anchored matches, else 0 */ 123 { 124 Node *p, *p1; 125 fa *f; 126 127 p = reparse(s); 128 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 129 /* put ALL STAR in front of reg. exp. */ 130 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 131 /* put FINAL after reg. exp. */ 132 133 poscnt = 0; 134 penter(p1); /* enter parent pointers and leaf indices */ 135 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 136 overflo("out of space for fa"); 137 f->accept = poscnt-1; /* penter has computed number of positions in re */ 138 cfoll(f, p1); /* set up follow sets */ 139 freetr(p1); 140 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 141 overflo("out of space in makedfa"); 142 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 143 overflo("out of space in makedfa"); 144 *f->posns[1] = 0; 145 f->initstat = makeinit(f, anchor); 146 f->anchor = anchor; 147 f->restr = (uschar *) tostring(s); 148 return f; 149 } 150 151 int makeinit(fa *f, int anchor) 152 { 153 int i, k; 154 155 f->curstat = 2; 156 f->out[2] = 0; 157 f->reset = 0; 158 k = *(f->re[0].lfollow); 159 xfree(f->posns[2]); 160 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 161 overflo("out of space in makeinit"); 162 for (i=0; i <= k; i++) { 163 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 164 } 165 if ((f->posns[2])[1] == f->accept) 166 f->out[2] = 1; 167 for (i=0; i < NCHARS; i++) 168 f->gototab[2][i] = 0; 169 f->curstat = cgoto(f, 2, HAT); 170 if (anchor) { 171 *f->posns[2] = k-1; /* leave out position 0 */ 172 for (i=0; i < k; i++) { 173 (f->posns[0])[i] = (f->posns[2])[i]; 174 } 175 176 f->out[0] = f->out[2]; 177 if (f->curstat != 2) 178 --(*f->posns[f->curstat]); 179 } 180 return f->curstat; 181 } 182 183 void penter(Node *p) /* set up parent pointers and leaf indices */ 184 { 185 switch (type(p)) { 186 LEAF 187 info(p) = poscnt; 188 poscnt++; 189 break; 190 UNARY 191 penter(left(p)); 192 parent(left(p)) = p; 193 break; 194 case CAT: 195 case OR: 196 penter(left(p)); 197 penter(right(p)); 198 parent(left(p)) = p; 199 parent(right(p)) = p; 200 break; 201 default: /* can't happen */ 202 FATAL("can't happen: unknown type %d in penter", type(p)); 203 break; 204 } 205 } 206 207 void freetr(Node *p) /* free parse tree */ 208 { 209 switch (type(p)) { 210 LEAF 211 xfree(p); 212 break; 213 UNARY 214 freetr(left(p)); 215 xfree(p); 216 break; 217 case CAT: 218 case OR: 219 freetr(left(p)); 220 freetr(right(p)); 221 xfree(p); 222 break; 223 default: /* can't happen */ 224 FATAL("can't happen: unknown type %d in freetr", type(p)); 225 break; 226 } 227 } 228 229 /* in the parsing of regular expressions, metacharacters like . have */ 230 /* to be seen literally; \056 is not a metacharacter. */ 231 232 int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 233 { /* only pick up one 8-bit byte (2 chars) */ 234 uschar *p; 235 int n = 0; 236 int i; 237 238 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 239 if (isdigit(*p)) 240 n = 16 * n + *p - '0'; 241 else if (*p >= 'a' && *p <= 'f') 242 n = 16 * n + *p - 'a' + 10; 243 else if (*p >= 'A' && *p <= 'F') 244 n = 16 * n + *p - 'A' + 10; 245 } 246 *pp = (char *) p; 247 return n; 248 } 249 250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 251 252 int quoted(char **pp) /* pick up next thing after a \\ */ 253 /* and increment *pp */ 254 { 255 char *p = *pp; 256 int c; 257 258 if ((c = *p++) == 't') 259 c = '\t'; 260 else if (c == 'n') 261 c = '\n'; 262 else if (c == 'f') 263 c = '\f'; 264 else if (c == 'r') 265 c = '\r'; 266 else if (c == 'b') 267 c = '\b'; 268 else if (c == '\\') 269 c = '\\'; 270 else if (c == 'x') { /* hexadecimal goo follows */ 271 c = hexstr(&p); /* this adds a null if number is invalid */ 272 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 273 int n = c - '0'; 274 if (isoctdigit(*p)) { 275 n = 8 * n + *p++ - '0'; 276 if (isoctdigit(*p)) 277 n = 8 * n + *p++ - '0'; 278 } 279 c = n; 280 } /* else */ 281 /* c = c; */ 282 *pp = p; 283 return c; 284 } 285 286 char *cclenter(char *argp) /* add a character class */ 287 { 288 int i, c, c2; 289 uschar *p = (uschar *) argp; 290 uschar *op, *bp; 291 static uschar *buf = 0; 292 static int bufsz = 100; 293 294 op = p; 295 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 296 FATAL("out of space for character class [%.10s...] 1", p); 297 bp = buf; 298 for (i = 0; (c = *p++) != 0; ) { 299 if (c == '\\') { 300 c = quoted((char **) &p); 301 } else if (c == '-' && i > 0 && bp[-1] != 0) { 302 if (*p != 0) { 303 c = bp[-1]; 304 c2 = *p++; 305 if (c2 == '\\') 306 c2 = quoted((char **) &p); 307 if (c > c2) { /* empty; ignore */ 308 bp--; 309 i--; 310 continue; 311 } 312 while (c < c2) { 313 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 314 FATAL("out of space for character class [%.10s...] 2", p); 315 *bp++ = ++c; 316 i++; 317 } 318 continue; 319 } 320 } 321 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 322 FATAL("out of space for character class [%.10s...] 3", p); 323 *bp++ = c; 324 i++; 325 } 326 *bp = 0; 327 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 328 xfree(op); 329 return (char *) tostring((char *) buf); 330 } 331 332 void overflo(char *s) 333 { 334 FATAL("regular expression too big: %.30s...", s); 335 } 336 337 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 338 { 339 int i; 340 int *p; 341 342 switch (type(v)) { 343 LEAF 344 f->re[info(v)].ltype = type(v); 345 f->re[info(v)].lval.np = right(v); 346 while (f->accept >= maxsetvec) { /* guessing here! */ 347 maxsetvec *= 4; 348 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 349 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 350 if (setvec == 0 || tmpset == 0) 351 overflo("out of space in cfoll()"); 352 } 353 for (i = 0; i <= f->accept; i++) 354 setvec[i] = 0; 355 setcnt = 0; 356 follow(v); /* computes setvec and setcnt */ 357 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 358 overflo("out of space building follow set"); 359 f->re[info(v)].lfollow = p; 360 *p = setcnt; 361 for (i = f->accept; i >= 0; i--) 362 if (setvec[i] == 1) 363 *++p = i; 364 break; 365 UNARY 366 cfoll(f,left(v)); 367 break; 368 case CAT: 369 case OR: 370 cfoll(f,left(v)); 371 cfoll(f,right(v)); 372 break; 373 default: /* can't happen */ 374 FATAL("can't happen: unknown type %d in cfoll", type(v)); 375 } 376 } 377 378 int first(Node *p) /* collects initially active leaves of p into setvec */ 379 /* returns 1 if p matches empty string */ 380 { 381 int b, lp; 382 383 switch (type(p)) { 384 LEAF 385 lp = info(p); /* look for high-water mark of subscripts */ 386 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 387 maxsetvec *= 4; 388 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 389 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 390 if (setvec == 0 || tmpset == 0) 391 overflo("out of space in first()"); 392 } 393 if (setvec[lp] != 1) { 394 setvec[lp] = 1; 395 setcnt++; 396 } 397 if (type(p) == CCL && (*(char *) right(p)) == '\0') 398 return(0); /* empty CCL */ 399 else return(1); 400 case PLUS: 401 if (first(left(p)) == 0) return(0); 402 return(1); 403 case STAR: 404 case QUEST: 405 first(left(p)); 406 return(0); 407 case CAT: 408 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 409 return(1); 410 case OR: 411 b = first(right(p)); 412 if (first(left(p)) == 0 || b == 0) return(0); 413 return(1); 414 } 415 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 416 return(-1); 417 } 418 419 void follow(Node *v) /* collects leaves that can follow v into setvec */ 420 { 421 Node *p; 422 423 if (type(v) == FINAL) 424 return; 425 p = parent(v); 426 switch (type(p)) { 427 case STAR: 428 case PLUS: 429 first(v); 430 follow(p); 431 return; 432 433 case OR: 434 case QUEST: 435 follow(p); 436 return; 437 438 case CAT: 439 if (v == left(p)) { /* v is left child of p */ 440 if (first(right(p)) == 0) { 441 follow(p); 442 return; 443 } 444 } else /* v is right child */ 445 follow(p); 446 return; 447 } 448 } 449 450 int member(int c, char *sarg) /* is c in s? */ 451 { 452 uschar *s = (uschar *) sarg; 453 454 while (*s) 455 if (c == *s++) 456 return(1); 457 return(0); 458 } 459 460 int match(fa *f, char *p0) /* shortest match ? */ 461 { 462 int s, ns; 463 uschar *p = (uschar *) p0; 464 465 s = f->reset ? makeinit(f,0) : f->initstat; 466 if (f->out[s]) 467 return(1); 468 do { 469 if ((ns = f->gototab[s][*p]) != 0) 470 s = ns; 471 else 472 s = cgoto(f, s, *p); 473 if (f->out[s]) 474 return(1); 475 } while (*p++ != 0); 476 return(0); 477 } 478 479 int pmatch(fa *f, char *p0) /* longest match, for sub */ 480 { 481 int s, ns; 482 uschar *p = (uschar *) p0; 483 uschar *q; 484 int i, k; 485 486 s = f->reset ? makeinit(f,1) : f->initstat; 487 patbeg = (char *) p; 488 patlen = -1; 489 do { 490 q = p; 491 do { 492 if (f->out[s]) /* final state */ 493 patlen = q-p; 494 if ((ns = f->gototab[s][*q]) != 0) 495 s = ns; 496 else 497 s = cgoto(f, s, *q); 498 if (s == 1) { /* no transition */ 499 if (patlen >= 0) { 500 patbeg = (char *) p; 501 return(1); 502 } 503 else 504 goto nextin; /* no match */ 505 } 506 } while (*q++ != 0); 507 if (f->out[s]) 508 patlen = q-p-1; /* don't count $ */ 509 if (patlen >= 0) { 510 patbeg = (char *) p; 511 return(1); 512 } 513 nextin: 514 s = 2; 515 if (f->reset) { 516 for (i = 2; i <= f->curstat; i++) 517 xfree(f->posns[i]); 518 k = *f->posns[0]; 519 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 520 overflo("out of space in pmatch"); 521 for (i = 0; i <= k; i++) 522 (f->posns[2])[i] = (f->posns[0])[i]; 523 f->initstat = f->curstat = 2; 524 f->out[2] = f->out[0]; 525 for (i = 0; i < NCHARS; i++) 526 f->gototab[2][i] = 0; 527 } 528 } while (*p++ != 0); 529 return (0); 530 } 531 532 int nematch(fa *f, char *p0) /* non-empty match, for sub */ 533 { 534 int s, ns; 535 uschar *p = (uschar *) p0; 536 uschar *q; 537 int i, k; 538 539 s = f->reset ? makeinit(f,1) : f->initstat; 540 patlen = -1; 541 while (*p) { 542 q = p; 543 do { 544 if (f->out[s]) /* final state */ 545 patlen = q-p; 546 if ((ns = f->gototab[s][*q]) != 0) 547 s = ns; 548 else 549 s = cgoto(f, s, *q); 550 if (s == 1) { /* no transition */ 551 if (patlen > 0) { 552 patbeg = (char *) p; 553 return(1); 554 } else 555 goto nnextin; /* no nonempty match */ 556 } 557 } while (*q++ != 0); 558 if (f->out[s]) 559 patlen = q-p-1; /* don't count $ */ 560 if (patlen > 0 ) { 561 patbeg = (char *) p; 562 return(1); 563 } 564 nnextin: 565 s = 2; 566 if (f->reset) { 567 for (i = 2; i <= f->curstat; i++) 568 xfree(f->posns[i]); 569 k = *f->posns[0]; 570 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 571 overflo("out of state space"); 572 for (i = 0; i <= k; i++) 573 (f->posns[2])[i] = (f->posns[0])[i]; 574 f->initstat = f->curstat = 2; 575 f->out[2] = f->out[0]; 576 for (i = 0; i < NCHARS; i++) 577 f->gototab[2][i] = 0; 578 } 579 p++; 580 } 581 return (0); 582 } 583 584 Node *reparse(char *p) /* parses regular expression pointed to by p */ 585 { /* uses relex() to scan regular expression */ 586 Node *np; 587 588 dprintf( ("reparse <%s>\n", p) ); 589 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 590 rtok = relex(); 591 if (rtok == '\0') 592 FATAL("empty regular expression"); 593 np = regexp(); 594 if (rtok != '\0') 595 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 596 return(np); 597 } 598 599 Node *regexp(void) /* top-level parse of reg expr */ 600 { 601 return (alt(concat(primary()))); 602 } 603 604 Node *primary(void) 605 { 606 Node *np; 607 608 switch (rtok) { 609 case CHAR: 610 np = op2(CHAR, NIL, itonp(rlxval)); 611 rtok = relex(); 612 return (unary(np)); 613 case ALL: 614 rtok = relex(); 615 return (unary(op2(ALL, NIL, NIL))); 616 case DOT: 617 rtok = relex(); 618 return (unary(op2(DOT, NIL, NIL))); 619 case CCL: 620 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 621 rtok = relex(); 622 return (unary(np)); 623 case NCCL: 624 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 625 rtok = relex(); 626 return (unary(np)); 627 case '^': 628 rtok = relex(); 629 return (unary(op2(CHAR, NIL, itonp(HAT)))); 630 case '$': 631 rtok = relex(); 632 return (unary(op2(CHAR, NIL, NIL))); 633 case '(': 634 rtok = relex(); 635 if (rtok == ')') { /* special pleading for () */ 636 rtok = relex(); 637 return unary(op2(CCL, NIL, (Node *) tostring(""))); 638 } 639 np = regexp(); 640 if (rtok == ')') { 641 rtok = relex(); 642 return (unary(np)); 643 } 644 else 645 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 646 default: 647 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 648 } 649 return 0; /*NOTREACHED*/ 650 } 651 652 Node *concat(Node *np) 653 { 654 switch (rtok) { 655 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 656 return (concat(op2(CAT, np, primary()))); 657 } 658 return (np); 659 } 660 661 Node *alt(Node *np) 662 { 663 if (rtok == OR) { 664 rtok = relex(); 665 return (alt(op2(OR, np, concat(primary())))); 666 } 667 return (np); 668 } 669 670 Node *unary(Node *np) 671 { 672 switch (rtok) { 673 case STAR: 674 rtok = relex(); 675 return (unary(op2(STAR, np, NIL))); 676 case PLUS: 677 rtok = relex(); 678 return (unary(op2(PLUS, np, NIL))); 679 case QUEST: 680 rtok = relex(); 681 return (unary(op2(QUEST, np, NIL))); 682 default: 683 return (np); 684 } 685 } 686 687 int relex(void) /* lexical analyzer for reparse */ 688 { 689 int c, n; 690 int cflag; 691 static uschar *buf = 0; 692 static int bufsz = 100; 693 uschar *bp; 694 695 switch (c = *prestr++) { 696 case '|': return OR; 697 case '*': return STAR; 698 case '+': return PLUS; 699 case '?': return QUEST; 700 case '.': return DOT; 701 case '\0': prestr--; return '\0'; 702 case '^': 703 case '$': 704 case '(': 705 case ')': 706 return c; 707 case '\\': 708 rlxval = quoted((char **) &prestr); 709 return CHAR; 710 default: 711 rlxval = c; 712 return CHAR; 713 case '[': 714 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 715 FATAL("out of space in reg expr %.10s..", lastre); 716 bp = buf; 717 if (*prestr == '^') { 718 cflag = 1; 719 prestr++; 720 } 721 else 722 cflag = 0; 723 n = 2 * strlen(prestr)+1; 724 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 725 FATAL("out of space for reg expr %.10s...", lastre); 726 for (; ; ) { 727 if ((c = *prestr++) == '\\') { 728 *bp++ = '\\'; 729 if ((c = *prestr++) == '\0') 730 FATAL("nonterminated character class %.20s...", lastre); 731 *bp++ = c; 732 /* } else if (c == '\n') { */ 733 /* FATAL("newline in character class %.20s...", lastre); */ 734 } else if (c == '\0') { 735 FATAL("nonterminated character class %.20s", lastre); 736 } else if (bp == buf) { /* 1st char is special */ 737 *bp++ = c; 738 } else if (c == ']') { 739 *bp++ = 0; 740 rlxstr = (uschar *) tostring((char *) buf); 741 if (cflag == 0) 742 return CCL; 743 else 744 return NCCL; 745 } else 746 *bp++ = c; 747 } 748 } 749 } 750 751 int cgoto(fa *f, int s, int c) 752 { 753 int i, j, k; 754 int *p, *q; 755 756 if (c < 0 || c > 255) 757 FATAL("can't happen: neg char %d in cgoto", c); 758 while (f->accept >= maxsetvec) { /* guessing here! */ 759 maxsetvec *= 4; 760 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 761 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 762 if (setvec == 0 || tmpset == 0) 763 overflo("out of space in cgoto()"); 764 } 765 for (i = 0; i <= f->accept; i++) 766 setvec[i] = 0; 767 setcnt = 0; 768 /* compute positions of gototab[s,c] into setvec */ 769 p = f->posns[s]; 770 for (i = 1; i <= *p; i++) { 771 if ((k = f->re[p[i]].ltype) != FINAL) { 772 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 773 || (k == DOT && c != 0 && c != HAT) 774 || (k == ALL && c != 0) 775 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 776 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 777 q = f->re[p[i]].lfollow; 778 for (j = 1; j <= *q; j++) { 779 if (q[j] >= maxsetvec) { 780 maxsetvec *= 4; 781 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 782 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 783 if (setvec == 0 || tmpset == 0) 784 overflo("cgoto overflow"); 785 } 786 if (setvec[q[j]] == 0) { 787 setcnt++; 788 setvec[q[j]] = 1; 789 } 790 } 791 } 792 } 793 } 794 /* determine if setvec is a previous state */ 795 tmpset[0] = setcnt; 796 j = 1; 797 for (i = f->accept; i >= 0; i--) 798 if (setvec[i]) { 799 tmpset[j++] = i; 800 } 801 /* tmpset == previous state? */ 802 for (i = 1; i <= f->curstat; i++) { 803 p = f->posns[i]; 804 if ((k = tmpset[0]) != p[0]) 805 goto different; 806 for (j = 1; j <= k; j++) 807 if (tmpset[j] != p[j]) 808 goto different; 809 /* setvec is state i */ 810 f->gototab[s][c] = i; 811 return i; 812 different:; 813 } 814 815 /* add tmpset to current set of states */ 816 if (f->curstat >= NSTATES-1) { 817 f->curstat = 2; 818 f->reset = 1; 819 for (i = 2; i < NSTATES; i++) 820 xfree(f->posns[i]); 821 } else 822 ++(f->curstat); 823 for (i = 0; i < NCHARS; i++) 824 f->gototab[f->curstat][i] = 0; 825 xfree(f->posns[f->curstat]); 826 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 827 overflo("out of space in cgoto"); 828 829 f->posns[f->curstat] = p; 830 f->gototab[s][c] = f->curstat; 831 for (i = 0; i <= setcnt; i++) 832 p[i] = tmpset[i]; 833 if (setvec[f->accept]) 834 f->out[f->curstat] = 1; 835 else 836 f->out[f->curstat] = 0; 837 return f->curstat; 838 } 839 840 841 void freefa(fa *f) /* free a finite automaton */ 842 { 843 int i; 844 845 if (f == NULL) 846 return; 847 for (i = 0; i <= f->curstat; i++) 848 xfree(f->posns[i]); 849 for (i = 0; i <= f->accept; i++) { 850 xfree(f->re[i].lfollow); 851 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 852 xfree((f->re[i].lval.np)); 853 } 854 xfree(f->restr); 855 xfree(f); 856 } 857