1 /* $OpenBSD: b.c,v 1.9 2001/07/12 05:16:53 deraadt Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 /* lasciate ogne speranza, voi ch'entrate. */ 27 28 #define DEBUG 29 30 #include <ctype.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include "awk.h" 35 #include "ytab.h" 36 37 #define HAT (NCHARS-1) /* matches ^ in regular expr */ 38 /* NCHARS is 2**n */ 39 #define MAXLIN 22 40 41 #define type(v) (v)->nobj /* badly overloaded here */ 42 #define info(v) (v)->ntype /* badly overloaded here */ 43 #define left(v) (v)->narg[0] 44 #define right(v) (v)->narg[1] 45 #define parent(v) (v)->nnext 46 47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 48 #define UNARY case STAR: case PLUS: case QUEST: 49 50 /* encoding in tree Nodes: 51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 52 left is index, right contains value or pointer to value 53 unary (STAR, PLUS, QUEST): left is child, right is null 54 binary (CAT, OR): left and right are children 55 parent contains pointer to parent 56 */ 57 58 59 int *setvec; 60 int *tmpset; 61 int maxsetvec = 0; 62 63 int rtok; /* next token in current re */ 64 int rlxval; 65 char *rlxstr; 66 char *prestr; /* current position in current re */ 67 char *lastre; /* origin of last re */ 68 69 static int setcnt; 70 static int poscnt; 71 72 char *patbeg; 73 int patlen; 74 75 #define NFA 20 /* cache this many dynamic fa's */ 76 fa *fatab[NFA]; 77 int nfatab = 0; /* entries in fatab */ 78 79 fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */ 80 { 81 int i, use, nuse; 82 fa *pfa; 83 static int now = 1; 84 85 if (setvec == 0) { /* first time through any RE */ 86 maxsetvec = MAXLIN; 87 setvec = (int *) malloc(maxsetvec * sizeof(int)); 88 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 89 if (setvec == 0 || tmpset == 0) 90 overflo("out of space initializing makedfa"); 91 } 92 93 if (compile_time) /* a constant for sure */ 94 return mkdfa(s, anchor); 95 for (i = 0; i < nfatab; i++) /* is it there already? */ 96 if (fatab[i]->anchor == anchor 97 && strcmp(fatab[i]->restr, s) == 0) { 98 fatab[i]->use = now++; 99 return fatab[i]; 100 } 101 pfa = mkdfa(s, anchor); 102 if (nfatab < NFA) { /* room for another */ 103 fatab[nfatab] = pfa; 104 fatab[nfatab]->use = now++; 105 nfatab++; 106 return pfa; 107 } 108 use = fatab[0]->use; /* replace least-recently used */ 109 nuse = 0; 110 for (i = 1; i < nfatab; i++) 111 if (fatab[i]->use < use) { 112 use = fatab[i]->use; 113 nuse = i; 114 } 115 freefa(fatab[nuse]); 116 fatab[nuse] = pfa; 117 pfa->use = now++; 118 return pfa; 119 } 120 121 fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */ 122 /* anchor = 1 for anchored matches, else 0 */ 123 { 124 Node *p, *p1; 125 fa *f; 126 127 p = reparse(s); 128 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 129 /* put ALL STAR in front of reg. exp. */ 130 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 131 /* put FINAL after reg. exp. */ 132 133 poscnt = 0; 134 penter(p1); /* enter parent pointers and leaf indices */ 135 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 136 overflo("out of space for fa"); 137 f->accept = poscnt-1; /* penter has computed number of positions in re */ 138 cfoll(f, p1); /* set up follow sets */ 139 freetr(p1); 140 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 141 overflo("out of space in makedfa"); 142 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 143 overflo("out of space in makedfa"); 144 *f->posns[1] = 0; 145 f->initstat = makeinit(f, anchor); 146 f->anchor = anchor; 147 f->restr = tostring(s); 148 return f; 149 } 150 151 int makeinit(fa *f, int anchor) 152 { 153 int i, k; 154 155 f->curstat = 2; 156 f->out[2] = 0; 157 f->reset = 0; 158 k = *(f->re[0].lfollow); 159 xfree(f->posns[2]); 160 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 161 overflo("out of space in makeinit"); 162 for (i=0; i <= k; i++) { 163 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 164 } 165 if ((f->posns[2])[1] == f->accept) 166 f->out[2] = 1; 167 for (i=0; i < NCHARS; i++) 168 f->gototab[2][i] = 0; 169 f->curstat = cgoto(f, 2, HAT); 170 if (anchor) { 171 *f->posns[2] = k-1; /* leave out position 0 */ 172 for (i=0; i < k; i++) { 173 (f->posns[0])[i] = (f->posns[2])[i]; 174 } 175 176 f->out[0] = f->out[2]; 177 if (f->curstat != 2) 178 --(*f->posns[f->curstat]); 179 } 180 return f->curstat; 181 } 182 183 void penter(Node *p) /* set up parent pointers and leaf indices */ 184 { 185 switch (type(p)) { 186 LEAF 187 info(p) = poscnt; 188 poscnt++; 189 break; 190 UNARY 191 penter(left(p)); 192 parent(left(p)) = p; 193 break; 194 case CAT: 195 case OR: 196 penter(left(p)); 197 penter(right(p)); 198 parent(left(p)) = p; 199 parent(right(p)) = p; 200 break; 201 default: /* can't happen */ 202 FATAL("can't happen: unknown type %d in penter", type(p)); 203 break; 204 } 205 } 206 207 void freetr(Node *p) /* free parse tree */ 208 { 209 switch (type(p)) { 210 LEAF 211 xfree(p); 212 break; 213 UNARY 214 freetr(left(p)); 215 xfree(p); 216 break; 217 case CAT: 218 case OR: 219 freetr(left(p)); 220 freetr(right(p)); 221 xfree(p); 222 break; 223 default: /* can't happen */ 224 FATAL("can't happen: unknown type %d in freetr", type(p)); 225 break; 226 } 227 } 228 229 /* in the parsing of regular expressions, metacharacters like . have */ 230 /* to be seen literally; \056 is not a metacharacter. */ 231 232 int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 233 { /* only pick up one 8-bit byte (2 chars) */ 234 char *p; 235 int n = 0; 236 int i; 237 238 for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) { 239 if (isdigit(*p)) 240 n = 16 * n + *p - '0'; 241 else if (*p >= 'a' && *p <= 'f') 242 n = 16 * n + *p - 'a' + 10; 243 else if (*p >= 'A' && *p <= 'F') 244 n = 16 * n + *p - 'A' + 10; 245 } 246 *pp = p; 247 return n; 248 } 249 250 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 251 252 int quoted(char **pp) /* pick up next thing after a \\ */ 253 /* and increment *pp */ 254 { 255 char *p = *pp; 256 int c; 257 258 if ((c = *p++) == 't') 259 c = '\t'; 260 else if (c == 'n') 261 c = '\n'; 262 else if (c == 'f') 263 c = '\f'; 264 else if (c == 'r') 265 c = '\r'; 266 else if (c == 'b') 267 c = '\b'; 268 else if (c == '\\') 269 c = '\\'; 270 else if (c == 'x') { /* hexadecimal goo follows */ 271 c = hexstr(&p); /* this adds a null if number is invalid */ 272 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 273 int n = c - '0'; 274 if (isoctdigit(*p)) { 275 n = 8 * n + *p++ - '0'; 276 if (isoctdigit(*p)) 277 n = 8 * n + *p++ - '0'; 278 } 279 c = n; 280 } /* else */ 281 /* c = c; */ 282 *pp = p; 283 return c; 284 } 285 286 char *cclenter(char *p) /* add a character class */ 287 { 288 int i, c, c2; 289 char *op, *bp; 290 static char *buf = 0; 291 static int bufsz = 100; 292 293 op = p; 294 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) 295 FATAL("out of space for character class [%.10s...] 1", p); 296 bp = buf; 297 for (i = 0; (c = *p++) != 0; ) { 298 if (c == '\\') { 299 c = quoted(&p); 300 } else if (c == '-' && i > 0 && bp[-1] != 0) { 301 if (*p != 0) { 302 c = bp[-1]; 303 c2 = *p++; 304 if (c2 == '\\') 305 c2 = quoted(&p); 306 if (c > c2) { /* empty; ignore */ 307 bp--; 308 i--; 309 continue; 310 } 311 while (c < c2) { 312 if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0)) 313 FATAL("out of space for character class [%.10s...] 2", p); 314 *bp++ = ++c; 315 i++; 316 } 317 continue; 318 } 319 } 320 if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0)) 321 FATAL("out of space for character class [%.10s...] 3", p); 322 *bp++ = c; 323 i++; 324 } 325 *bp = 0; 326 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 327 xfree(op); 328 return(tostring(buf)); 329 } 330 331 void overflo(char *s) 332 { 333 FATAL("regular expression too big: %.30s...", s); 334 } 335 336 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 337 { 338 int i; 339 int *p; 340 341 switch (type(v)) { 342 LEAF 343 f->re[info(v)].ltype = type(v); 344 f->re[info(v)].lval.np = right(v); 345 while (f->accept >= maxsetvec) { /* guessing here! */ 346 maxsetvec *= 4; 347 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 348 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 349 if (setvec == 0 || tmpset == 0) 350 overflo("out of space in cfoll()"); 351 } 352 for (i = 0; i <= f->accept; i++) 353 setvec[i] = 0; 354 setcnt = 0; 355 follow(v); /* computes setvec and setcnt */ 356 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 357 overflo("out of space building follow set"); 358 f->re[info(v)].lfollow = p; 359 *p = setcnt; 360 for (i = f->accept; i >= 0; i--) 361 if (setvec[i] == 1) 362 *++p = i; 363 break; 364 UNARY 365 cfoll(f,left(v)); 366 break; 367 case CAT: 368 case OR: 369 cfoll(f,left(v)); 370 cfoll(f,right(v)); 371 break; 372 default: /* can't happen */ 373 FATAL("can't happen: unknown type %d in cfoll", type(v)); 374 } 375 } 376 377 int first(Node *p) /* collects initially active leaves of p into setvec */ 378 /* returns 1 if p matches empty string */ 379 { 380 int b, lp; 381 382 switch (type(p)) { 383 LEAF 384 lp = info(p); /* look for high-water mark of subscripts */ 385 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 386 maxsetvec *= 4; 387 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 388 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 389 if (setvec == 0 || tmpset == 0) 390 overflo("out of space in first()"); 391 } 392 if (setvec[lp] != 1) { 393 setvec[lp] = 1; 394 setcnt++; 395 } 396 if (type(p) == CCL && (*(char *) right(p)) == '\0') 397 return(0); /* empty CCL */ 398 else return(1); 399 case PLUS: 400 if (first(left(p)) == 0) return(0); 401 return(1); 402 case STAR: 403 case QUEST: 404 first(left(p)); 405 return(0); 406 case CAT: 407 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 408 return(1); 409 case OR: 410 b = first(right(p)); 411 if (first(left(p)) == 0 || b == 0) return(0); 412 return(1); 413 } 414 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 415 return(-1); 416 } 417 418 void follow(Node *v) /* collects leaves that can follow v into setvec */ 419 { 420 Node *p; 421 422 if (type(v) == FINAL) 423 return; 424 p = parent(v); 425 switch (type(p)) { 426 case STAR: 427 case PLUS: 428 first(v); 429 follow(p); 430 return; 431 432 case OR: 433 case QUEST: 434 follow(p); 435 return; 436 437 case CAT: 438 if (v == left(p)) { /* v is left child of p */ 439 if (first(right(p)) == 0) { 440 follow(p); 441 return; 442 } 443 } else /* v is right child */ 444 follow(p); 445 return; 446 } 447 } 448 449 int member(int c, char *s) /* is c in s? */ 450 { 451 while (*s) 452 if (c == *s++) 453 return(1); 454 return(0); 455 } 456 457 int match(fa *f, char *p0) /* shortest match ? */ 458 { 459 int s, ns; 460 uschar *p = (uschar *) p0; 461 462 s = f->reset ? makeinit(f,0) : f->initstat; 463 if (f->out[s]) 464 return(1); 465 do { 466 if ((ns = f->gototab[s][*p]) != 0) 467 s = ns; 468 else 469 s = cgoto(f, s, *p); 470 if (f->out[s]) 471 return(1); 472 } while (*p++ != 0); 473 return(0); 474 } 475 476 int pmatch(fa *f, char *p0) /* longest match, for sub */ 477 { 478 int s, ns; 479 uschar *p = (uschar *) p0; 480 uschar *q; 481 int i, k; 482 483 s = f->reset ? makeinit(f,1) : f->initstat; 484 patbeg = (char *) p; 485 patlen = -1; 486 do { 487 q = p; 488 do { 489 if (f->out[s]) /* final state */ 490 patlen = q-p; 491 if ((ns = f->gototab[s][*q]) != 0) 492 s = ns; 493 else 494 s = cgoto(f, s, *q); 495 if (s == 1) { /* no transition */ 496 if (patlen >= 0) { 497 patbeg = (char *) p; 498 return(1); 499 } 500 else 501 goto nextin; /* no match */ 502 } 503 } while (*q++ != 0); 504 if (f->out[s]) 505 patlen = q-p-1; /* don't count $ */ 506 if (patlen >= 0) { 507 patbeg = (char *) p; 508 return(1); 509 } 510 nextin: 511 s = 2; 512 if (f->reset) { 513 for (i = 2; i <= f->curstat; i++) 514 xfree(f->posns[i]); 515 k = *f->posns[0]; 516 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 517 overflo("out of space in pmatch"); 518 for (i = 0; i <= k; i++) 519 (f->posns[2])[i] = (f->posns[0])[i]; 520 f->initstat = f->curstat = 2; 521 f->out[2] = f->out[0]; 522 for (i = 0; i < NCHARS; i++) 523 f->gototab[2][i] = 0; 524 } 525 } while (*p++ != 0); 526 return (0); 527 } 528 529 int nematch(fa *f, char *p0) /* non-empty match, for sub */ 530 { 531 int s, ns; 532 uschar *p = (uschar *) p0; 533 uschar *q; 534 int i, k; 535 536 s = f->reset ? makeinit(f,1) : f->initstat; 537 patlen = -1; 538 while (*p) { 539 q = p; 540 do { 541 if (f->out[s]) /* final state */ 542 patlen = q-p; 543 if ((ns = f->gototab[s][*q]) != 0) 544 s = ns; 545 else 546 s = cgoto(f, s, *q); 547 if (s == 1) { /* no transition */ 548 if (patlen > 0) { 549 patbeg = (char *) p; 550 return(1); 551 } else 552 goto nnextin; /* no nonempty match */ 553 } 554 } while (*q++ != 0); 555 if (f->out[s]) 556 patlen = q-p-1; /* don't count $ */ 557 if (patlen > 0 ) { 558 patbeg = (char *) p; 559 return(1); 560 } 561 nnextin: 562 s = 2; 563 if (f->reset) { 564 for (i = 2; i <= f->curstat; i++) 565 xfree(f->posns[i]); 566 k = *f->posns[0]; 567 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 568 overflo("out of state space"); 569 for (i = 0; i <= k; i++) 570 (f->posns[2])[i] = (f->posns[0])[i]; 571 f->initstat = f->curstat = 2; 572 f->out[2] = f->out[0]; 573 for (i = 0; i < NCHARS; i++) 574 f->gototab[2][i] = 0; 575 } 576 p++; 577 } 578 return (0); 579 } 580 581 Node *reparse(char *p) /* parses regular expression pointed to by p */ 582 { /* uses relex() to scan regular expression */ 583 Node *np; 584 585 dprintf( ("reparse <%s>\n", p) ); 586 lastre = prestr = p; /* prestr points to string to be parsed */ 587 rtok = relex(); 588 if (rtok == '\0') 589 FATAL("empty regular expression"); 590 np = regexp(); 591 if (rtok != '\0') 592 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 593 return(np); 594 } 595 596 Node *regexp(void) /* top-level parse of reg expr */ 597 { 598 return (alt(concat(primary()))); 599 } 600 601 Node *primary(void) 602 { 603 Node *np; 604 605 switch (rtok) { 606 case CHAR: 607 np = op2(CHAR, NIL, itonp(rlxval)); 608 rtok = relex(); 609 return (unary(np)); 610 case ALL: 611 rtok = relex(); 612 return (unary(op2(ALL, NIL, NIL))); 613 case DOT: 614 rtok = relex(); 615 return (unary(op2(DOT, NIL, NIL))); 616 case CCL: 617 np = op2(CCL, NIL, (Node*) cclenter(rlxstr)); 618 rtok = relex(); 619 return (unary(np)); 620 case NCCL: 621 np = op2(NCCL, NIL, (Node *) cclenter(rlxstr)); 622 rtok = relex(); 623 return (unary(np)); 624 case '^': 625 rtok = relex(); 626 return (unary(op2(CHAR, NIL, itonp(HAT)))); 627 case '$': 628 rtok = relex(); 629 return (unary(op2(CHAR, NIL, NIL))); 630 case '(': 631 rtok = relex(); 632 if (rtok == ')') { /* special pleading for () */ 633 rtok = relex(); 634 return unary(op2(CCL, NIL, (Node *) tostring(""))); 635 } 636 np = regexp(); 637 if (rtok == ')') { 638 rtok = relex(); 639 return (unary(np)); 640 } 641 else 642 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 643 default: 644 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 645 } 646 return 0; /*NOTREACHED*/ 647 } 648 649 Node *concat(Node *np) 650 { 651 switch (rtok) { 652 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 653 return (concat(op2(CAT, np, primary()))); 654 } 655 return (np); 656 } 657 658 Node *alt(Node *np) 659 { 660 if (rtok == OR) { 661 rtok = relex(); 662 return (alt(op2(OR, np, concat(primary())))); 663 } 664 return (np); 665 } 666 667 Node *unary(Node *np) 668 { 669 switch (rtok) { 670 case STAR: 671 rtok = relex(); 672 return (unary(op2(STAR, np, NIL))); 673 case PLUS: 674 rtok = relex(); 675 return (unary(op2(PLUS, np, NIL))); 676 case QUEST: 677 rtok = relex(); 678 return (unary(op2(QUEST, np, NIL))); 679 default: 680 return (np); 681 } 682 } 683 684 int relex(void) /* lexical analyzer for reparse */ 685 { 686 int c, n; 687 int cflag; 688 static char *buf = 0; 689 static int bufsz = 100; 690 char *bp; 691 692 switch (c = *prestr++) { 693 case '|': return OR; 694 case '*': return STAR; 695 case '+': return PLUS; 696 case '?': return QUEST; 697 case '.': return DOT; 698 case '\0': prestr--; return '\0'; 699 case '^': 700 case '$': 701 case '(': 702 case ')': 703 return c; 704 case '\\': 705 rlxval = quoted(&prestr); 706 return CHAR; 707 default: 708 rlxval = c; 709 return CHAR; 710 case '[': 711 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) 712 FATAL("out of space in reg expr %.10s..", lastre); 713 bp = buf; 714 if (*prestr == '^') { 715 cflag = 1; 716 prestr++; 717 } 718 else 719 cflag = 0; 720 n = 2 * strlen(prestr)+1; 721 if (!adjbuf(&buf, &bufsz, n, n, &bp, 0)) 722 FATAL("out of space for reg expr %.10s...", lastre); 723 for (; ; ) { 724 if ((c = *prestr++) == '\\') { 725 *bp++ = '\\'; 726 if ((c = *prestr++) == '\0') 727 FATAL("nonterminated character class %.20s...", lastre); 728 *bp++ = c; 729 } else if (c == '\n') { 730 FATAL("newline in character class %.20s...", lastre); 731 } else if (c == '\0') { 732 FATAL("nonterminated character class %.20s", lastre); 733 } else if (bp == buf) { /* 1st char is special */ 734 *bp++ = c; 735 } else if (c == ']') { 736 *bp++ = 0; 737 rlxstr = tostring(buf); 738 if (cflag == 0) 739 return CCL; 740 else 741 return NCCL; 742 } else 743 *bp++ = c; 744 } 745 } 746 } 747 748 int cgoto(fa *f, int s, int c) 749 { 750 int i, j, k; 751 int *p, *q; 752 753 if (c < 0) 754 FATAL("can't happen: neg char %d in cgoto", c); 755 while (f->accept >= maxsetvec) { /* guessing here! */ 756 maxsetvec *= 4; 757 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 758 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 759 if (setvec == 0 || tmpset == 0) 760 overflo("out of space in cgoto()"); 761 } 762 for (i = 0; i <= f->accept; i++) 763 setvec[i] = 0; 764 setcnt = 0; 765 /* compute positions of gototab[s,c] into setvec */ 766 p = f->posns[s]; 767 for (i = 1; i <= *p; i++) { 768 if ((k = f->re[p[i]].ltype) != FINAL) { 769 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 770 || (k == DOT && c != 0 && c != HAT) 771 || (k == ALL && c != 0) 772 || (k == CCL && member(c, f->re[p[i]].lval.up)) 773 || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 774 q = f->re[p[i]].lfollow; 775 for (j = 1; j <= *q; j++) { 776 if (q[j] >= maxsetvec) { 777 maxsetvec *= 4; 778 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 779 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 780 if (setvec == 0 || tmpset == 0) 781 overflo("cgoto overflow"); 782 } 783 if (setvec[q[j]] == 0) { 784 setcnt++; 785 setvec[q[j]] = 1; 786 } 787 } 788 } 789 } 790 } 791 /* determine if setvec is a previous state */ 792 tmpset[0] = setcnt; 793 j = 1; 794 for (i = f->accept; i >= 0; i--) 795 if (setvec[i]) { 796 tmpset[j++] = i; 797 } 798 /* tmpset == previous state? */ 799 for (i = 1; i <= f->curstat; i++) { 800 p = f->posns[i]; 801 if ((k = tmpset[0]) != p[0]) 802 goto different; 803 for (j = 1; j <= k; j++) 804 if (tmpset[j] != p[j]) 805 goto different; 806 /* setvec is state i */ 807 f->gototab[s][c] = i; 808 return i; 809 different:; 810 } 811 812 /* add tmpset to current set of states */ 813 if (f->curstat >= NSTATES-1) { 814 f->curstat = 2; 815 f->reset = 1; 816 for (i = 2; i < NSTATES; i++) 817 xfree(f->posns[i]); 818 } else 819 ++(f->curstat); 820 for (i = 0; i < NCHARS; i++) 821 f->gototab[f->curstat][i] = 0; 822 xfree(f->posns[f->curstat]); 823 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 824 overflo("out of space in cgoto"); 825 826 f->posns[f->curstat] = p; 827 f->gototab[s][c] = f->curstat; 828 for (i = 0; i <= setcnt; i++) 829 p[i] = tmpset[i]; 830 if (setvec[f->accept]) 831 f->out[f->curstat] = 1; 832 else 833 f->out[f->curstat] = 0; 834 return f->curstat; 835 } 836 837 838 void freefa(fa *f) /* free a finite automaton */ 839 { 840 int i; 841 842 if (f == NULL) 843 return; 844 for (i = 0; i <= f->curstat; i++) 845 xfree(f->posns[i]); 846 for (i = 0; i <= f->accept; i++) { 847 xfree(f->re[i].lfollow); 848 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 849 xfree((f->re[i].lval.np)); 850 } 851 xfree(f->restr); 852 xfree(f); 853 } 854