1 /* $OpenBSD: b.c,v 1.20 2018/01/24 16:28:25 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 /* lasciate ogne speranza, voi ch'intrate. */ 27 28 #define DEBUG 29 30 #include <ctype.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include "awk.h" 35 #include "ytab.h" 36 37 #define HAT (NCHARS+2) /* matches ^ in regular expr */ 38 /* NCHARS is 2**n */ 39 #define MAXLIN 22 40 41 #define type(v) (v)->nobj /* badly overloaded here */ 42 #define info(v) (v)->ntype /* badly overloaded here */ 43 #define left(v) (v)->narg[0] 44 #define right(v) (v)->narg[1] 45 #define parent(v) (v)->nnext 46 47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 48 #define ELEAF case EMPTYRE: /* empty string in regexp */ 49 #define UNARY case STAR: case PLUS: case QUEST: 50 51 /* encoding in tree Nodes: 52 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): 53 left is index, right contains value or pointer to value 54 unary (STAR, PLUS, QUEST): left is child, right is null 55 binary (CAT, OR): left and right are children 56 parent contains pointer to parent 57 */ 58 59 60 int *setvec; 61 int *tmpset; 62 int maxsetvec = 0; 63 64 int rtok; /* next token in current re */ 65 int rlxval; 66 static uschar *rlxstr; 67 static uschar *prestr; /* current position in current re */ 68 static uschar *lastre; /* origin of last re */ 69 70 static int setcnt; 71 static int poscnt; 72 73 char *patbeg; 74 int patlen; 75 76 #define NFA 20 /* cache this many dynamic fa's */ 77 fa *fatab[NFA]; 78 int nfatab = 0; /* entries in fatab */ 79 80 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 81 { 82 int i, use, nuse; 83 fa *pfa; 84 static int now = 1; 85 86 if (setvec == 0) { /* first time through any RE */ 87 maxsetvec = MAXLIN; 88 setvec = (int *) calloc(maxsetvec, sizeof(int)); 89 tmpset = (int *) calloc(maxsetvec, sizeof(int)); 90 if (setvec == 0 || tmpset == 0) 91 overflo("out of space initializing makedfa"); 92 } 93 94 if (compile_time) /* a constant for sure */ 95 return mkdfa(s, anchor); 96 for (i = 0; i < nfatab; i++) /* is it there already? */ 97 if (fatab[i]->anchor == anchor 98 && strcmp((const char *) fatab[i]->restr, s) == 0) { 99 fatab[i]->use = now++; 100 return fatab[i]; 101 } 102 pfa = mkdfa(s, anchor); 103 if (nfatab < NFA) { /* room for another */ 104 fatab[nfatab] = pfa; 105 fatab[nfatab]->use = now++; 106 nfatab++; 107 return pfa; 108 } 109 use = fatab[0]->use; /* replace least-recently used */ 110 nuse = 0; 111 for (i = 1; i < nfatab; i++) 112 if (fatab[i]->use < use) { 113 use = fatab[i]->use; 114 nuse = i; 115 } 116 freefa(fatab[nuse]); 117 fatab[nuse] = pfa; 118 pfa->use = now++; 119 return pfa; 120 } 121 122 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 123 /* anchor = 1 for anchored matches, else 0 */ 124 { 125 Node *p, *p1; 126 fa *f; 127 128 p = reparse(s); 129 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 130 /* put ALL STAR in front of reg. exp. */ 131 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 132 /* put FINAL after reg. exp. */ 133 134 poscnt = 0; 135 penter(p1); /* enter parent pointers and leaf indices */ 136 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 137 overflo("out of space for fa"); 138 f->accept = poscnt-1; /* penter has computed number of positions in re */ 139 cfoll(f, p1); /* set up follow sets */ 140 freetr(p1); 141 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) 142 overflo("out of space in makedfa"); 143 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 144 overflo("out of space in makedfa"); 145 *f->posns[1] = 0; 146 f->initstat = makeinit(f, anchor); 147 f->anchor = anchor; 148 f->restr = (uschar *) tostring(s); 149 return f; 150 } 151 152 int makeinit(fa *f, int anchor) 153 { 154 int i, k; 155 156 f->curstat = 2; 157 f->out[2] = 0; 158 f->reset = 0; 159 k = *(f->re[0].lfollow); 160 xfree(f->posns[2]); 161 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 162 overflo("out of space in makeinit"); 163 for (i=0; i <= k; i++) { 164 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 165 } 166 if ((f->posns[2])[1] == f->accept) 167 f->out[2] = 1; 168 for (i=0; i < NCHARS; i++) 169 f->gototab[2][i] = 0; 170 f->curstat = cgoto(f, 2, HAT); 171 if (anchor) { 172 *f->posns[2] = k-1; /* leave out position 0 */ 173 for (i=0; i < k; i++) { 174 (f->posns[0])[i] = (f->posns[2])[i]; 175 } 176 177 f->out[0] = f->out[2]; 178 if (f->curstat != 2) 179 --(*f->posns[f->curstat]); 180 } 181 return f->curstat; 182 } 183 184 void penter(Node *p) /* set up parent pointers and leaf indices */ 185 { 186 switch (type(p)) { 187 ELEAF 188 LEAF 189 info(p) = poscnt; 190 poscnt++; 191 break; 192 UNARY 193 penter(left(p)); 194 parent(left(p)) = p; 195 break; 196 case CAT: 197 case OR: 198 penter(left(p)); 199 penter(right(p)); 200 parent(left(p)) = p; 201 parent(right(p)) = p; 202 break; 203 default: /* can't happen */ 204 FATAL("can't happen: unknown type %d in penter", type(p)); 205 break; 206 } 207 } 208 209 void freetr(Node *p) /* free parse tree */ 210 { 211 switch (type(p)) { 212 ELEAF 213 LEAF 214 xfree(p); 215 break; 216 UNARY 217 freetr(left(p)); 218 xfree(p); 219 break; 220 case CAT: 221 case OR: 222 freetr(left(p)); 223 freetr(right(p)); 224 xfree(p); 225 break; 226 default: /* can't happen */ 227 FATAL("can't happen: unknown type %d in freetr", type(p)); 228 break; 229 } 230 } 231 232 /* in the parsing of regular expressions, metacharacters like . have */ 233 /* to be seen literally; \056 is not a metacharacter. */ 234 235 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ 236 { /* only pick up one 8-bit byte (2 chars) */ 237 uschar *p; 238 int n = 0; 239 int i; 240 241 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 242 if (isdigit(*p)) 243 n = 16 * n + *p - '0'; 244 else if (*p >= 'a' && *p <= 'f') 245 n = 16 * n + *p - 'a' + 10; 246 else if (*p >= 'A' && *p <= 'F') 247 n = 16 * n + *p - 'A' + 10; 248 } 249 *pp = (uschar *) p; 250 return n; 251 } 252 253 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 254 255 int quoted(uschar **pp) /* pick up next thing after a \\ */ 256 /* and increment *pp */ 257 { 258 uschar *p = *pp; 259 int c; 260 261 if ((c = *p++) == 't') 262 c = '\t'; 263 else if (c == 'v') 264 c = '\v'; 265 else if (c == 'n') 266 c = '\n'; 267 else if (c == 'f') 268 c = '\f'; 269 else if (c == 'r') 270 c = '\r'; 271 else if (c == 'b') 272 c = '\b'; 273 else if (c == 'a') 274 c = '\007'; 275 else if (c == '\\') 276 c = '\\'; 277 else if (c == 'x') { /* hexadecimal goo follows */ 278 c = hexstr(&p); /* this adds a null if number is invalid */ 279 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 280 int n = c - '0'; 281 if (isoctdigit(*p)) { 282 n = 8 * n + *p++ - '0'; 283 if (isoctdigit(*p)) 284 n = 8 * n + *p++ - '0'; 285 } 286 c = n; 287 } /* else */ 288 /* c = c; */ 289 *pp = p; 290 return c; 291 } 292 293 char *cclenter(const char *argp) /* add a character class */ 294 { 295 int i, c, c2; 296 uschar *p = (uschar *) argp; 297 uschar *op, *bp; 298 static uschar *buf = 0; 299 static int bufsz = 100; 300 301 op = p; 302 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 303 FATAL("out of space for character class [%.10s...] 1", p); 304 bp = buf; 305 for (i = 0; (c = *p++) != 0; ) { 306 if (c == '\\') { 307 c = quoted(&p); 308 } else if (c == '-' && i > 0 && bp[-1] != 0) { 309 if (*p != 0) { 310 c = bp[-1]; 311 c2 = *p++; 312 if (c2 == '\\') 313 c2 = quoted(&p); 314 if (c > c2) { /* empty; ignore */ 315 bp--; 316 i--; 317 continue; 318 } 319 while (c < c2) { 320 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1")) 321 FATAL("out of space for character class [%.10s...] 2", p); 322 *bp++ = ++c; 323 i++; 324 } 325 continue; 326 } 327 } 328 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2")) 329 FATAL("out of space for character class [%.10s...] 3", p); 330 *bp++ = c; 331 i++; 332 } 333 *bp = 0; 334 DPRINTF( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 335 xfree(op); 336 return (char *) tostring((char *) buf); 337 } 338 339 void overflo(const char *s) 340 { 341 FATAL("regular expression too big: %.30s...", s); 342 } 343 344 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 345 { 346 int i; 347 int *p; 348 349 switch (type(v)) { 350 ELEAF 351 LEAF 352 f->re[info(v)].ltype = type(v); 353 f->re[info(v)].lval.np = right(v); 354 while (f->accept >= maxsetvec) { /* guessing here! */ 355 setvec = reallocarray(setvec, maxsetvec, 356 4 * sizeof(int)); 357 tmpset = reallocarray(tmpset, maxsetvec, 358 4 * sizeof(int)); 359 if (setvec == 0 || tmpset == 0) 360 overflo("out of space in cfoll()"); 361 maxsetvec *= 4; 362 } 363 for (i = 0; i <= f->accept; i++) 364 setvec[i] = 0; 365 setcnt = 0; 366 follow(v); /* computes setvec and setcnt */ 367 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) 368 overflo("out of space building follow set"); 369 f->re[info(v)].lfollow = p; 370 *p = setcnt; 371 for (i = f->accept; i >= 0; i--) 372 if (setvec[i] == 1) 373 *++p = i; 374 break; 375 UNARY 376 cfoll(f,left(v)); 377 break; 378 case CAT: 379 case OR: 380 cfoll(f,left(v)); 381 cfoll(f,right(v)); 382 break; 383 default: /* can't happen */ 384 FATAL("can't happen: unknown type %d in cfoll", type(v)); 385 } 386 } 387 388 int first(Node *p) /* collects initially active leaves of p into setvec */ 389 /* returns 0 if p matches empty string */ 390 { 391 int b, lp; 392 393 switch (type(p)) { 394 ELEAF 395 LEAF 396 lp = info(p); /* look for high-water mark of subscripts */ 397 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 398 setvec = reallocarray(setvec, maxsetvec, 399 4 * sizeof(int)); 400 tmpset = reallocarray(tmpset, maxsetvec, 401 4 * sizeof(int)); 402 if (setvec == 0 || tmpset == 0) 403 overflo("out of space in first()"); 404 maxsetvec *= 4; 405 } 406 if (type(p) == EMPTYRE) { 407 setvec[lp] = 0; 408 return(0); 409 } 410 if (setvec[lp] != 1) { 411 setvec[lp] = 1; 412 setcnt++; 413 } 414 if (type(p) == CCL && (*(char *) right(p)) == '\0') 415 return(0); /* empty CCL */ 416 else return(1); 417 case PLUS: 418 if (first(left(p)) == 0) return(0); 419 return(1); 420 case STAR: 421 case QUEST: 422 first(left(p)); 423 return(0); 424 case CAT: 425 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 426 return(1); 427 case OR: 428 b = first(right(p)); 429 if (first(left(p)) == 0 || b == 0) return(0); 430 return(1); 431 } 432 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 433 return(-1); 434 } 435 436 void follow(Node *v) /* collects leaves that can follow v into setvec */ 437 { 438 Node *p; 439 440 if (type(v) == FINAL) 441 return; 442 p = parent(v); 443 switch (type(p)) { 444 case STAR: 445 case PLUS: 446 first(v); 447 follow(p); 448 return; 449 450 case OR: 451 case QUEST: 452 follow(p); 453 return; 454 455 case CAT: 456 if (v == left(p)) { /* v is left child of p */ 457 if (first(right(p)) == 0) { 458 follow(p); 459 return; 460 } 461 } else /* v is right child */ 462 follow(p); 463 return; 464 } 465 } 466 467 int member(int c, const char *sarg) /* is c in s? */ 468 { 469 uschar *s = (uschar *) sarg; 470 471 while (*s) 472 if (c == *s++) 473 return(1); 474 return(0); 475 } 476 477 int match(fa *f, const char *p0) /* shortest match ? */ 478 { 479 int s, ns; 480 uschar *p = (uschar *) p0; 481 482 s = f->reset ? makeinit(f,0) : f->initstat; 483 if (f->out[s]) 484 return(1); 485 do { 486 /* assert(*p < NCHARS); */ 487 if ((ns = f->gototab[s][*p]) != 0) 488 s = ns; 489 else 490 s = cgoto(f, s, *p); 491 if (f->out[s]) 492 return(1); 493 } while (*p++ != 0); 494 return(0); 495 } 496 497 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 498 { 499 int s, ns; 500 uschar *p = (uschar *) p0; 501 uschar *q; 502 int i, k; 503 504 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 505 if (f->reset) { 506 f->initstat = s = makeinit(f,1); 507 } else { 508 s = f->initstat; 509 } 510 patbeg = (char *) p; 511 patlen = -1; 512 do { 513 q = p; 514 do { 515 if (f->out[s]) /* final state */ 516 patlen = q-p; 517 /* assert(*q < NCHARS); */ 518 if ((ns = f->gototab[s][*q]) != 0) 519 s = ns; 520 else 521 s = cgoto(f, s, *q); 522 if (s == 1) { /* no transition */ 523 if (patlen >= 0) { 524 patbeg = (char *) p; 525 return(1); 526 } 527 else 528 goto nextin; /* no match */ 529 } 530 } while (*q++ != 0); 531 if (f->out[s]) 532 patlen = q-p-1; /* don't count $ */ 533 if (patlen >= 0) { 534 patbeg = (char *) p; 535 return(1); 536 } 537 nextin: 538 s = 2; 539 if (f->reset) { 540 for (i = 2; i <= f->curstat; i++) 541 xfree(f->posns[i]); 542 k = *f->posns[0]; 543 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 544 overflo("out of space in pmatch"); 545 for (i = 0; i <= k; i++) 546 (f->posns[2])[i] = (f->posns[0])[i]; 547 f->initstat = f->curstat = 2; 548 f->out[2] = f->out[0]; 549 for (i = 0; i < NCHARS; i++) 550 f->gototab[2][i] = 0; 551 } 552 } while (*p++ != 0); 553 return (0); 554 } 555 556 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 557 { 558 int s, ns; 559 uschar *p = (uschar *) p0; 560 uschar *q; 561 int i, k; 562 563 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 564 if (f->reset) { 565 f->initstat = s = makeinit(f,1); 566 } else { 567 s = f->initstat; 568 } 569 patlen = -1; 570 while (*p) { 571 q = p; 572 do { 573 if (f->out[s]) /* final state */ 574 patlen = q-p; 575 /* assert(*q < NCHARS); */ 576 if ((ns = f->gototab[s][*q]) != 0) 577 s = ns; 578 else 579 s = cgoto(f, s, *q); 580 if (s == 1) { /* no transition */ 581 if (patlen > 0) { 582 patbeg = (char *) p; 583 return(1); 584 } else 585 goto nnextin; /* no nonempty match */ 586 } 587 } while (*q++ != 0); 588 if (f->out[s]) 589 patlen = q-p-1; /* don't count $ */ 590 if (patlen > 0 ) { 591 patbeg = (char *) p; 592 return(1); 593 } 594 nnextin: 595 s = 2; 596 if (f->reset) { 597 for (i = 2; i <= f->curstat; i++) 598 xfree(f->posns[i]); 599 k = *f->posns[0]; 600 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 601 overflo("out of state space"); 602 for (i = 0; i <= k; i++) 603 (f->posns[2])[i] = (f->posns[0])[i]; 604 f->initstat = f->curstat = 2; 605 f->out[2] = f->out[0]; 606 for (i = 0; i < NCHARS; i++) 607 f->gototab[2][i] = 0; 608 } 609 p++; 610 } 611 return (0); 612 } 613 614 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 615 { /* uses relex() to scan regular expression */ 616 Node *np; 617 618 DPRINTF( ("reparse <%s>\n", p) ); 619 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 620 rtok = relex(); 621 /* GNU compatibility: an empty regexp matches anything */ 622 if (rtok == '\0') { 623 /* FATAL("empty regular expression"); previous */ 624 return(op2(EMPTYRE, NIL, NIL)); 625 } 626 np = regexp(); 627 if (rtok != '\0') 628 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 629 return(np); 630 } 631 632 Node *regexp(void) /* top-level parse of reg expr */ 633 { 634 return (alt(concat(primary()))); 635 } 636 637 Node *primary(void) 638 { 639 Node *np; 640 641 switch (rtok) { 642 case CHAR: 643 np = op2(CHAR, NIL, itonp(rlxval)); 644 rtok = relex(); 645 return (unary(np)); 646 case ALL: 647 rtok = relex(); 648 return (unary(op2(ALL, NIL, NIL))); 649 case EMPTYRE: 650 rtok = relex(); 651 return (unary(op2(ALL, NIL, NIL))); 652 case DOT: 653 rtok = relex(); 654 return (unary(op2(DOT, NIL, NIL))); 655 case CCL: 656 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 657 rtok = relex(); 658 return (unary(np)); 659 case NCCL: 660 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 661 rtok = relex(); 662 return (unary(np)); 663 case '^': 664 rtok = relex(); 665 return (unary(op2(CHAR, NIL, itonp(HAT)))); 666 case '$': 667 rtok = relex(); 668 return (unary(op2(CHAR, NIL, NIL))); 669 case '(': 670 rtok = relex(); 671 if (rtok == ')') { /* special pleading for () */ 672 rtok = relex(); 673 return unary(op2(CCL, NIL, (Node *) tostring(""))); 674 } 675 np = regexp(); 676 if (rtok == ')') { 677 rtok = relex(); 678 return (unary(np)); 679 } 680 else 681 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 682 default: 683 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 684 } 685 return 0; /*NOTREACHED*/ 686 } 687 688 Node *concat(Node *np) 689 { 690 switch (rtok) { 691 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': 692 return (concat(op2(CAT, np, primary()))); 693 } 694 return (np); 695 } 696 697 Node *alt(Node *np) 698 { 699 if (rtok == OR) { 700 rtok = relex(); 701 return (alt(op2(OR, np, concat(primary())))); 702 } 703 return (np); 704 } 705 706 Node *unary(Node *np) 707 { 708 switch (rtok) { 709 case STAR: 710 rtok = relex(); 711 return (unary(op2(STAR, np, NIL))); 712 case PLUS: 713 rtok = relex(); 714 return (unary(op2(PLUS, np, NIL))); 715 case QUEST: 716 rtok = relex(); 717 return (unary(op2(QUEST, np, NIL))); 718 default: 719 return (np); 720 } 721 } 722 723 /* 724 * Character class definitions conformant to the POSIX locale as 725 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 726 * and operating character sets are both ASCII (ISO646) or supersets 727 * thereof. 728 * 729 * Note that to avoid overflowing the temporary buffer used in 730 * relex(), the expanded character class (prior to range expansion) 731 * must be less than twice the size of their full name. 732 */ 733 734 /* Because isblank doesn't show up in any of the header files on any 735 * system i use, it's defined here. if some other locale has a richer 736 * definition of "blank", define HAS_ISBLANK and provide your own 737 * version. 738 * the parentheses here are an attempt to find a path through the maze 739 * of macro definition and/or function and/or version provided. thanks 740 * to nelson beebe for the suggestion; let's see if it works everywhere. 741 */ 742 743 #ifndef HAS_ISBLANK 744 745 int (xisblank)(int c) 746 { 747 return c==' ' || c=='\t'; 748 } 749 750 #endif 751 752 struct charclass { 753 const char *cc_name; 754 int cc_namelen; 755 int (*cc_func)(int); 756 } charclasses[] = { 757 { "alnum", 5, isalnum }, 758 { "alpha", 5, isalpha }, 759 #ifndef HAS_ISBLANK 760 { "blank", 5, isspace }, /* was isblank */ 761 #else 762 { "blank", 5, isblank }, 763 #endif 764 { "cntrl", 5, iscntrl }, 765 { "digit", 5, isdigit }, 766 { "graph", 5, isgraph }, 767 { "lower", 5, islower }, 768 { "print", 5, isprint }, 769 { "punct", 5, ispunct }, 770 { "space", 5, isspace }, 771 { "upper", 5, isupper }, 772 { "xdigit", 6, isxdigit }, 773 { NULL, 0, NULL }, 774 }; 775 776 777 int relex(void) /* lexical analyzer for reparse */ 778 { 779 int c, n; 780 int cflag; 781 static uschar *buf = 0; 782 static int bufsz = 100; 783 uschar *bp; 784 struct charclass *cc; 785 int i; 786 787 switch (c = *prestr++) { 788 case '|': return OR; 789 case '*': return STAR; 790 case '+': return PLUS; 791 case '?': return QUEST; 792 case '.': return DOT; 793 case '\0': prestr--; return '\0'; 794 case '^': 795 case '$': 796 case '(': 797 case ')': 798 return c; 799 case '\\': 800 rlxval = quoted(&prestr); 801 return CHAR; 802 default: 803 rlxval = c; 804 return CHAR; 805 case '[': 806 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 807 FATAL("out of space in reg expr %.10s..", lastre); 808 bp = buf; 809 if (*prestr == '^') { 810 cflag = 1; 811 prestr++; 812 } 813 else 814 cflag = 0; 815 n = 2 * strlen((const char *) prestr)+1; 816 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) 817 FATAL("out of space for reg expr %.10s...", lastre); 818 for (; ; ) { 819 if ((c = *prestr++) == '\\') { 820 *bp++ = '\\'; 821 if ((c = *prestr++) == '\0') 822 FATAL("nonterminated character class %.20s...", lastre); 823 *bp++ = c; 824 /* } else if (c == '\n') { */ 825 /* FATAL("newline in character class %.20s...", lastre); */ 826 } else if (c == '[' && *prestr == ':') { 827 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 828 for (cc = charclasses; cc->cc_name; cc++) 829 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 830 break; 831 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 832 prestr[2 + cc->cc_namelen] == ']') { 833 prestr += cc->cc_namelen + 3; 834 for (i = 0; i < NCHARS; i++) { 835 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) 836 FATAL("out of space for reg expr %.10s...", lastre); 837 if (cc->cc_func(i)) { 838 *bp++ = i; 839 n++; 840 } 841 } 842 } else 843 *bp++ = c; 844 } else if (c == '\0') { 845 FATAL("nonterminated character class %.20s", lastre); 846 } else if (bp == buf) { /* 1st char is special */ 847 *bp++ = c; 848 } else if (c == ']') { 849 *bp++ = 0; 850 rlxstr = (uschar *) tostring((char *) buf); 851 if (cflag == 0) 852 return CCL; 853 else 854 return NCCL; 855 } else 856 *bp++ = c; 857 } 858 } 859 } 860 861 int cgoto(fa *f, int s, int c) 862 { 863 int i, j, k; 864 int *p, *q; 865 866 assert(c == HAT || c < NCHARS); 867 while (f->accept >= maxsetvec) { /* guessing here! */ 868 setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); 869 tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); 870 if (setvec == 0 || tmpset == 0) 871 overflo("out of space in cgoto()"); 872 maxsetvec *= 4; 873 } 874 for (i = 0; i <= f->accept; i++) 875 setvec[i] = 0; 876 setcnt = 0; 877 /* compute positions of gototab[s,c] into setvec */ 878 p = f->posns[s]; 879 for (i = 1; i <= *p; i++) { 880 if ((k = f->re[p[i]].ltype) != FINAL) { 881 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 882 || (k == DOT && c != 0 && c != HAT) 883 || (k == ALL && c != 0) 884 || (k == EMPTYRE && c != 0) 885 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 886 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 887 q = f->re[p[i]].lfollow; 888 for (j = 1; j <= *q; j++) { 889 if (q[j] >= maxsetvec) { 890 setvec = reallocarray(setvec, 891 maxsetvec, 4 * sizeof(int)); 892 tmpset = reallocarray(tmpset, 893 maxsetvec, 4 * sizeof(int)); 894 if (setvec == 0 || tmpset == 0) 895 overflo("cgoto overflow"); 896 maxsetvec *= 4; 897 } 898 if (setvec[q[j]] == 0) { 899 setcnt++; 900 setvec[q[j]] = 1; 901 } 902 } 903 } 904 } 905 } 906 /* determine if setvec is a previous state */ 907 tmpset[0] = setcnt; 908 j = 1; 909 for (i = f->accept; i >= 0; i--) 910 if (setvec[i]) { 911 tmpset[j++] = i; 912 } 913 /* tmpset == previous state? */ 914 for (i = 1; i <= f->curstat; i++) { 915 p = f->posns[i]; 916 if ((k = tmpset[0]) != p[0]) 917 goto different; 918 for (j = 1; j <= k; j++) 919 if (tmpset[j] != p[j]) 920 goto different; 921 /* setvec is state i */ 922 f->gototab[s][c] = i; 923 return i; 924 different:; 925 } 926 927 /* add tmpset to current set of states */ 928 if (f->curstat >= NSTATES-1) { 929 f->curstat = 2; 930 f->reset = 1; 931 for (i = 2; i < NSTATES; i++) 932 xfree(f->posns[i]); 933 } else 934 ++(f->curstat); 935 for (i = 0; i < NCHARS; i++) 936 f->gototab[f->curstat][i] = 0; 937 xfree(f->posns[f->curstat]); 938 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) 939 overflo("out of space in cgoto"); 940 941 f->posns[f->curstat] = p; 942 f->gototab[s][c] = f->curstat; 943 for (i = 0; i <= setcnt; i++) 944 p[i] = tmpset[i]; 945 if (setvec[f->accept]) 946 f->out[f->curstat] = 1; 947 else 948 f->out[f->curstat] = 0; 949 return f->curstat; 950 } 951 952 953 void freefa(fa *f) /* free a finite automaton */ 954 { 955 int i; 956 957 if (f == NULL) 958 return; 959 for (i = 0; i <= f->curstat; i++) 960 xfree(f->posns[i]); 961 for (i = 0; i <= f->accept; i++) { 962 xfree(f->re[i].lfollow); 963 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 964 xfree((f->re[i].lval.np)); 965 } 966 xfree(f->restr); 967 xfree(f); 968 } 969