1 /* $OpenBSD: b.c,v 1.18 2014/12/19 19:28:55 deraadt Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 /* lasciate ogne speranza, voi ch'intrate. */ 27 28 #define DEBUG 29 30 #include <ctype.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 #include "awk.h" 35 #include "ytab.h" 36 37 #define HAT (NCHARS+2) /* matches ^ in regular expr */ 38 /* NCHARS is 2**n */ 39 #define MAXLIN 22 40 41 #define type(v) (v)->nobj /* badly overloaded here */ 42 #define info(v) (v)->ntype /* badly overloaded here */ 43 #define left(v) (v)->narg[0] 44 #define right(v) (v)->narg[1] 45 #define parent(v) (v)->nnext 46 47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 48 #define ELEAF case EMPTYRE: /* empty string in regexp */ 49 #define UNARY case STAR: case PLUS: case QUEST: 50 51 /* encoding in tree Nodes: 52 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): 53 left is index, right contains value or pointer to value 54 unary (STAR, PLUS, QUEST): left is child, right is null 55 binary (CAT, OR): left and right are children 56 parent contains pointer to parent 57 */ 58 59 60 int *setvec; 61 int *tmpset; 62 int maxsetvec = 0; 63 64 int rtok; /* next token in current re */ 65 int rlxval; 66 static uschar *rlxstr; 67 static uschar *prestr; /* current position in current re */ 68 static uschar *lastre; /* origin of last re */ 69 70 static int setcnt; 71 static int poscnt; 72 73 char *patbeg; 74 int patlen; 75 76 #define NFA 20 /* cache this many dynamic fa's */ 77 fa *fatab[NFA]; 78 int nfatab = 0; /* entries in fatab */ 79 80 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 81 { 82 int i, use, nuse; 83 fa *pfa; 84 static int now = 1; 85 86 if (setvec == 0) { /* first time through any RE */ 87 maxsetvec = MAXLIN; 88 setvec = (int *) calloc(maxsetvec, sizeof(int)); 89 tmpset = (int *) calloc(maxsetvec, sizeof(int)); 90 if (setvec == 0 || tmpset == 0) 91 overflo("out of space initializing makedfa"); 92 } 93 94 if (compile_time) /* a constant for sure */ 95 return mkdfa(s, anchor); 96 for (i = 0; i < nfatab; i++) /* is it there already? */ 97 if (fatab[i]->anchor == anchor 98 && strcmp((const char *) fatab[i]->restr, s) == 0) { 99 fatab[i]->use = now++; 100 return fatab[i]; 101 } 102 pfa = mkdfa(s, anchor); 103 if (nfatab < NFA) { /* room for another */ 104 fatab[nfatab] = pfa; 105 fatab[nfatab]->use = now++; 106 nfatab++; 107 return pfa; 108 } 109 use = fatab[0]->use; /* replace least-recently used */ 110 nuse = 0; 111 for (i = 1; i < nfatab; i++) 112 if (fatab[i]->use < use) { 113 use = fatab[i]->use; 114 nuse = i; 115 } 116 freefa(fatab[nuse]); 117 fatab[nuse] = pfa; 118 pfa->use = now++; 119 return pfa; 120 } 121 122 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 123 /* anchor = 1 for anchored matches, else 0 */ 124 { 125 Node *p, *p1; 126 fa *f; 127 128 p = reparse(s); 129 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 130 /* put ALL STAR in front of reg. exp. */ 131 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 132 /* put FINAL after reg. exp. */ 133 134 poscnt = 0; 135 penter(p1); /* enter parent pointers and leaf indices */ 136 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 137 overflo("out of space for fa"); 138 f->accept = poscnt-1; /* penter has computed number of positions in re */ 139 cfoll(f, p1); /* set up follow sets */ 140 freetr(p1); 141 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) 142 overflo("out of space in makedfa"); 143 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 144 overflo("out of space in makedfa"); 145 *f->posns[1] = 0; 146 f->initstat = makeinit(f, anchor); 147 f->anchor = anchor; 148 f->restr = (uschar *) tostring(s); 149 return f; 150 } 151 152 int makeinit(fa *f, int anchor) 153 { 154 int i, k; 155 156 f->curstat = 2; 157 f->out[2] = 0; 158 f->reset = 0; 159 k = *(f->re[0].lfollow); 160 xfree(f->posns[2]); 161 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 162 overflo("out of space in makeinit"); 163 for (i=0; i <= k; i++) { 164 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 165 } 166 if ((f->posns[2])[1] == f->accept) 167 f->out[2] = 1; 168 for (i=0; i < NCHARS; i++) 169 f->gototab[2][i] = 0; 170 f->curstat = cgoto(f, 2, HAT); 171 if (anchor) { 172 *f->posns[2] = k-1; /* leave out position 0 */ 173 for (i=0; i < k; i++) { 174 (f->posns[0])[i] = (f->posns[2])[i]; 175 } 176 177 f->out[0] = f->out[2]; 178 if (f->curstat != 2) 179 --(*f->posns[f->curstat]); 180 } 181 return f->curstat; 182 } 183 184 void penter(Node *p) /* set up parent pointers and leaf indices */ 185 { 186 switch (type(p)) { 187 ELEAF 188 LEAF 189 info(p) = poscnt; 190 poscnt++; 191 break; 192 UNARY 193 penter(left(p)); 194 parent(left(p)) = p; 195 break; 196 case CAT: 197 case OR: 198 penter(left(p)); 199 penter(right(p)); 200 parent(left(p)) = p; 201 parent(right(p)) = p; 202 break; 203 default: /* can't happen */ 204 FATAL("can't happen: unknown type %d in penter", type(p)); 205 break; 206 } 207 } 208 209 void freetr(Node *p) /* free parse tree */ 210 { 211 switch (type(p)) { 212 ELEAF 213 LEAF 214 xfree(p); 215 break; 216 UNARY 217 freetr(left(p)); 218 xfree(p); 219 break; 220 case CAT: 221 case OR: 222 freetr(left(p)); 223 freetr(right(p)); 224 xfree(p); 225 break; 226 default: /* can't happen */ 227 FATAL("can't happen: unknown type %d in freetr", type(p)); 228 break; 229 } 230 } 231 232 /* in the parsing of regular expressions, metacharacters like . have */ 233 /* to be seen literally; \056 is not a metacharacter. */ 234 235 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ 236 { /* only pick up one 8-bit byte (2 chars) */ 237 uschar *p; 238 int n = 0; 239 int i; 240 241 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 242 if (isdigit(*p)) 243 n = 16 * n + *p - '0'; 244 else if (*p >= 'a' && *p <= 'f') 245 n = 16 * n + *p - 'a' + 10; 246 else if (*p >= 'A' && *p <= 'F') 247 n = 16 * n + *p - 'A' + 10; 248 } 249 *pp = (uschar *) p; 250 return n; 251 } 252 253 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 254 255 int quoted(uschar **pp) /* pick up next thing after a \\ */ 256 /* and increment *pp */ 257 { 258 uschar *p = *pp; 259 int c; 260 261 if ((c = *p++) == 't') 262 c = '\t'; 263 else if (c == 'n') 264 c = '\n'; 265 else if (c == 'f') 266 c = '\f'; 267 else if (c == 'r') 268 c = '\r'; 269 else if (c == 'b') 270 c = '\b'; 271 else if (c == '\\') 272 c = '\\'; 273 else if (c == 'x') { /* hexadecimal goo follows */ 274 c = hexstr(&p); /* this adds a null if number is invalid */ 275 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 276 int n = c - '0'; 277 if (isoctdigit(*p)) { 278 n = 8 * n + *p++ - '0'; 279 if (isoctdigit(*p)) 280 n = 8 * n + *p++ - '0'; 281 } 282 c = n; 283 } /* else */ 284 /* c = c; */ 285 *pp = p; 286 return c; 287 } 288 289 char *cclenter(const char *argp) /* add a character class */ 290 { 291 int i, c, c2; 292 uschar *p = (uschar *) argp; 293 uschar *op, *bp; 294 static uschar *buf = 0; 295 static int bufsz = 100; 296 297 op = p; 298 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 299 FATAL("out of space for character class [%.10s...] 1", p); 300 bp = buf; 301 for (i = 0; (c = *p++) != 0; ) { 302 if (c == '\\') { 303 c = quoted(&p); 304 } else if (c == '-' && i > 0 && bp[-1] != 0) { 305 if (*p != 0) { 306 c = bp[-1]; 307 c2 = *p++; 308 if (c2 == '\\') 309 c2 = quoted(&p); 310 if (c > c2) { /* empty; ignore */ 311 bp--; 312 i--; 313 continue; 314 } 315 while (c < c2) { 316 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1")) 317 FATAL("out of space for character class [%.10s...] 2", p); 318 *bp++ = ++c; 319 i++; 320 } 321 continue; 322 } 323 } 324 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2")) 325 FATAL("out of space for character class [%.10s...] 3", p); 326 *bp++ = c; 327 i++; 328 } 329 *bp = 0; 330 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 331 xfree(op); 332 return (char *) tostring((char *) buf); 333 } 334 335 void overflo(const char *s) 336 { 337 FATAL("regular expression too big: %.30s...", s); 338 } 339 340 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 341 { 342 int i; 343 int *p; 344 345 switch (type(v)) { 346 ELEAF 347 LEAF 348 f->re[info(v)].ltype = type(v); 349 f->re[info(v)].lval.np = right(v); 350 while (f->accept >= maxsetvec) { /* guessing here! */ 351 setvec = reallocarray(setvec, maxsetvec, 352 4 * sizeof(int)); 353 tmpset = reallocarray(tmpset, maxsetvec, 354 4 * sizeof(int)); 355 if (setvec == 0 || tmpset == 0) 356 overflo("out of space in cfoll()"); 357 maxsetvec *= 4; 358 } 359 for (i = 0; i <= f->accept; i++) 360 setvec[i] = 0; 361 setcnt = 0; 362 follow(v); /* computes setvec and setcnt */ 363 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) 364 overflo("out of space building follow set"); 365 f->re[info(v)].lfollow = p; 366 *p = setcnt; 367 for (i = f->accept; i >= 0; i--) 368 if (setvec[i] == 1) 369 *++p = i; 370 break; 371 UNARY 372 cfoll(f,left(v)); 373 break; 374 case CAT: 375 case OR: 376 cfoll(f,left(v)); 377 cfoll(f,right(v)); 378 break; 379 default: /* can't happen */ 380 FATAL("can't happen: unknown type %d in cfoll", type(v)); 381 } 382 } 383 384 int first(Node *p) /* collects initially active leaves of p into setvec */ 385 /* returns 0 if p matches empty string */ 386 { 387 int b, lp; 388 389 switch (type(p)) { 390 ELEAF 391 LEAF 392 lp = info(p); /* look for high-water mark of subscripts */ 393 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 394 setvec = reallocarray(setvec, maxsetvec, 395 4 * sizeof(int)); 396 tmpset = reallocarray(tmpset, maxsetvec, 397 4 * sizeof(int)); 398 if (setvec == 0 || tmpset == 0) 399 overflo("out of space in first()"); 400 maxsetvec *= 4; 401 } 402 if (type(p) == EMPTYRE) { 403 setvec[lp] = 0; 404 return(0); 405 } 406 if (setvec[lp] != 1) { 407 setvec[lp] = 1; 408 setcnt++; 409 } 410 if (type(p) == CCL && (*(char *) right(p)) == '\0') 411 return(0); /* empty CCL */ 412 else return(1); 413 case PLUS: 414 if (first(left(p)) == 0) return(0); 415 return(1); 416 case STAR: 417 case QUEST: 418 first(left(p)); 419 return(0); 420 case CAT: 421 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 422 return(1); 423 case OR: 424 b = first(right(p)); 425 if (first(left(p)) == 0 || b == 0) return(0); 426 return(1); 427 } 428 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 429 return(-1); 430 } 431 432 void follow(Node *v) /* collects leaves that can follow v into setvec */ 433 { 434 Node *p; 435 436 if (type(v) == FINAL) 437 return; 438 p = parent(v); 439 switch (type(p)) { 440 case STAR: 441 case PLUS: 442 first(v); 443 follow(p); 444 return; 445 446 case OR: 447 case QUEST: 448 follow(p); 449 return; 450 451 case CAT: 452 if (v == left(p)) { /* v is left child of p */ 453 if (first(right(p)) == 0) { 454 follow(p); 455 return; 456 } 457 } else /* v is right child */ 458 follow(p); 459 return; 460 } 461 } 462 463 int member(int c, const char *sarg) /* is c in s? */ 464 { 465 uschar *s = (uschar *) sarg; 466 467 while (*s) 468 if (c == *s++) 469 return(1); 470 return(0); 471 } 472 473 int match(fa *f, const char *p0) /* shortest match ? */ 474 { 475 int s, ns; 476 uschar *p = (uschar *) p0; 477 478 s = f->reset ? makeinit(f,0) : f->initstat; 479 if (f->out[s]) 480 return(1); 481 do { 482 /* assert(*p < NCHARS); */ 483 if ((ns = f->gototab[s][*p]) != 0) 484 s = ns; 485 else 486 s = cgoto(f, s, *p); 487 if (f->out[s]) 488 return(1); 489 } while (*p++ != 0); 490 return(0); 491 } 492 493 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 494 { 495 int s, ns; 496 uschar *p = (uschar *) p0; 497 uschar *q; 498 int i, k; 499 500 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 501 if (f->reset) { 502 f->initstat = s = makeinit(f,1); 503 } else { 504 s = f->initstat; 505 } 506 patbeg = (char *) p; 507 patlen = -1; 508 do { 509 q = p; 510 do { 511 if (f->out[s]) /* final state */ 512 patlen = q-p; 513 /* assert(*q < NCHARS); */ 514 if ((ns = f->gototab[s][*q]) != 0) 515 s = ns; 516 else 517 s = cgoto(f, s, *q); 518 if (s == 1) { /* no transition */ 519 if (patlen >= 0) { 520 patbeg = (char *) p; 521 return(1); 522 } 523 else 524 goto nextin; /* no match */ 525 } 526 } while (*q++ != 0); 527 if (f->out[s]) 528 patlen = q-p-1; /* don't count $ */ 529 if (patlen >= 0) { 530 patbeg = (char *) p; 531 return(1); 532 } 533 nextin: 534 s = 2; 535 if (f->reset) { 536 for (i = 2; i <= f->curstat; i++) 537 xfree(f->posns[i]); 538 k = *f->posns[0]; 539 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 540 overflo("out of space in pmatch"); 541 for (i = 0; i <= k; i++) 542 (f->posns[2])[i] = (f->posns[0])[i]; 543 f->initstat = f->curstat = 2; 544 f->out[2] = f->out[0]; 545 for (i = 0; i < NCHARS; i++) 546 f->gototab[2][i] = 0; 547 } 548 } while (*p++ != 0); 549 return (0); 550 } 551 552 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 553 { 554 int s, ns; 555 uschar *p = (uschar *) p0; 556 uschar *q; 557 int i, k; 558 559 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 560 if (f->reset) { 561 f->initstat = s = makeinit(f,1); 562 } else { 563 s = f->initstat; 564 } 565 patlen = -1; 566 while (*p) { 567 q = p; 568 do { 569 if (f->out[s]) /* final state */ 570 patlen = q-p; 571 /* assert(*q < NCHARS); */ 572 if ((ns = f->gototab[s][*q]) != 0) 573 s = ns; 574 else 575 s = cgoto(f, s, *q); 576 if (s == 1) { /* no transition */ 577 if (patlen > 0) { 578 patbeg = (char *) p; 579 return(1); 580 } else 581 goto nnextin; /* no nonempty match */ 582 } 583 } while (*q++ != 0); 584 if (f->out[s]) 585 patlen = q-p-1; /* don't count $ */ 586 if (patlen > 0 ) { 587 patbeg = (char *) p; 588 return(1); 589 } 590 nnextin: 591 s = 2; 592 if (f->reset) { 593 for (i = 2; i <= f->curstat; i++) 594 xfree(f->posns[i]); 595 k = *f->posns[0]; 596 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) 597 overflo("out of state space"); 598 for (i = 0; i <= k; i++) 599 (f->posns[2])[i] = (f->posns[0])[i]; 600 f->initstat = f->curstat = 2; 601 f->out[2] = f->out[0]; 602 for (i = 0; i < NCHARS; i++) 603 f->gototab[2][i] = 0; 604 } 605 p++; 606 } 607 return (0); 608 } 609 610 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 611 { /* uses relex() to scan regular expression */ 612 Node *np; 613 614 dprintf( ("reparse <%s>\n", p) ); 615 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 616 rtok = relex(); 617 /* GNU compatibility: an empty regexp matches anything */ 618 if (rtok == '\0') { 619 /* FATAL("empty regular expression"); previous */ 620 return(op2(EMPTYRE, NIL, NIL)); 621 } 622 np = regexp(); 623 if (rtok != '\0') 624 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 625 return(np); 626 } 627 628 Node *regexp(void) /* top-level parse of reg expr */ 629 { 630 return (alt(concat(primary()))); 631 } 632 633 Node *primary(void) 634 { 635 Node *np; 636 637 switch (rtok) { 638 case CHAR: 639 np = op2(CHAR, NIL, itonp(rlxval)); 640 rtok = relex(); 641 return (unary(np)); 642 case ALL: 643 rtok = relex(); 644 return (unary(op2(ALL, NIL, NIL))); 645 case EMPTYRE: 646 rtok = relex(); 647 return (unary(op2(ALL, NIL, NIL))); 648 case DOT: 649 rtok = relex(); 650 return (unary(op2(DOT, NIL, NIL))); 651 case CCL: 652 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 653 rtok = relex(); 654 return (unary(np)); 655 case NCCL: 656 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 657 rtok = relex(); 658 return (unary(np)); 659 case '^': 660 rtok = relex(); 661 return (unary(op2(CHAR, NIL, itonp(HAT)))); 662 case '$': 663 rtok = relex(); 664 return (unary(op2(CHAR, NIL, NIL))); 665 case '(': 666 rtok = relex(); 667 if (rtok == ')') { /* special pleading for () */ 668 rtok = relex(); 669 return unary(op2(CCL, NIL, (Node *) tostring(""))); 670 } 671 np = regexp(); 672 if (rtok == ')') { 673 rtok = relex(); 674 return (unary(np)); 675 } 676 else 677 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 678 default: 679 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 680 } 681 return 0; /*NOTREACHED*/ 682 } 683 684 Node *concat(Node *np) 685 { 686 switch (rtok) { 687 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': 688 return (concat(op2(CAT, np, primary()))); 689 } 690 return (np); 691 } 692 693 Node *alt(Node *np) 694 { 695 if (rtok == OR) { 696 rtok = relex(); 697 return (alt(op2(OR, np, concat(primary())))); 698 } 699 return (np); 700 } 701 702 Node *unary(Node *np) 703 { 704 switch (rtok) { 705 case STAR: 706 rtok = relex(); 707 return (unary(op2(STAR, np, NIL))); 708 case PLUS: 709 rtok = relex(); 710 return (unary(op2(PLUS, np, NIL))); 711 case QUEST: 712 rtok = relex(); 713 return (unary(op2(QUEST, np, NIL))); 714 default: 715 return (np); 716 } 717 } 718 719 /* 720 * Character class definitions conformant to the POSIX locale as 721 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 722 * and operating character sets are both ASCII (ISO646) or supersets 723 * thereof. 724 * 725 * Note that to avoid overflowing the temporary buffer used in 726 * relex(), the expanded character class (prior to range expansion) 727 * must be less than twice the size of their full name. 728 */ 729 730 /* Because isblank doesn't show up in any of the header files on any 731 * system i use, it's defined here. if some other locale has a richer 732 * definition of "blank", define HAS_ISBLANK and provide your own 733 * version. 734 * the parentheses here are an attempt to find a path through the maze 735 * of macro definition and/or function and/or version provided. thanks 736 * to nelson beebe for the suggestion; let's see if it works everywhere. 737 */ 738 739 #ifndef HAS_ISBLANK 740 741 int (xisblank)(int c) 742 { 743 return c==' ' || c=='\t'; 744 } 745 746 #endif 747 748 struct charclass { 749 const char *cc_name; 750 int cc_namelen; 751 int (*cc_func)(int); 752 } charclasses[] = { 753 { "alnum", 5, isalnum }, 754 { "alpha", 5, isalpha }, 755 #ifndef HAS_ISBLANK 756 { "blank", 5, isspace }, /* was isblank */ 757 #else 758 { "blank", 5, isblank }, 759 #endif 760 { "cntrl", 5, iscntrl }, 761 { "digit", 5, isdigit }, 762 { "graph", 5, isgraph }, 763 { "lower", 5, islower }, 764 { "print", 5, isprint }, 765 { "punct", 5, ispunct }, 766 { "space", 5, isspace }, 767 { "upper", 5, isupper }, 768 { "xdigit", 6, isxdigit }, 769 { NULL, 0, NULL }, 770 }; 771 772 773 int relex(void) /* lexical analyzer for reparse */ 774 { 775 int c, n; 776 int cflag; 777 static uschar *buf = 0; 778 static int bufsz = 100; 779 uschar *bp; 780 struct charclass *cc; 781 int i; 782 783 switch (c = *prestr++) { 784 case '|': return OR; 785 case '*': return STAR; 786 case '+': return PLUS; 787 case '?': return QUEST; 788 case '.': return DOT; 789 case '\0': prestr--; return '\0'; 790 case '^': 791 case '$': 792 case '(': 793 case ')': 794 return c; 795 case '\\': 796 rlxval = quoted(&prestr); 797 return CHAR; 798 default: 799 rlxval = c; 800 return CHAR; 801 case '[': 802 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 803 FATAL("out of space in reg expr %.10s..", lastre); 804 bp = buf; 805 if (*prestr == '^') { 806 cflag = 1; 807 prestr++; 808 } 809 else 810 cflag = 0; 811 n = 2 * strlen((const char *) prestr)+1; 812 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) 813 FATAL("out of space for reg expr %.10s...", lastre); 814 for (; ; ) { 815 if ((c = *prestr++) == '\\') { 816 *bp++ = '\\'; 817 if ((c = *prestr++) == '\0') 818 FATAL("nonterminated character class %.20s...", lastre); 819 *bp++ = c; 820 /* } else if (c == '\n') { */ 821 /* FATAL("newline in character class %.20s...", lastre); */ 822 } else if (c == '[' && *prestr == ':') { 823 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 824 for (cc = charclasses; cc->cc_name; cc++) 825 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 826 break; 827 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 828 prestr[2 + cc->cc_namelen] == ']') { 829 prestr += cc->cc_namelen + 3; 830 for (i = 0; i < NCHARS; i++) { 831 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) 832 FATAL("out of space for reg expr %.10s...", lastre); 833 if (cc->cc_func(i)) { 834 *bp++ = i; 835 n++; 836 } 837 } 838 } else 839 *bp++ = c; 840 } else if (c == '\0') { 841 FATAL("nonterminated character class %.20s", lastre); 842 } else if (bp == buf) { /* 1st char is special */ 843 *bp++ = c; 844 } else if (c == ']') { 845 *bp++ = 0; 846 rlxstr = (uschar *) tostring((char *) buf); 847 if (cflag == 0) 848 return CCL; 849 else 850 return NCCL; 851 } else 852 *bp++ = c; 853 } 854 } 855 } 856 857 int cgoto(fa *f, int s, int c) 858 { 859 int i, j, k; 860 int *p, *q; 861 862 assert(c == HAT || c < NCHARS); 863 while (f->accept >= maxsetvec) { /* guessing here! */ 864 setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); 865 tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); 866 if (setvec == 0 || tmpset == 0) 867 overflo("out of space in cgoto()"); 868 maxsetvec *= 4; 869 } 870 for (i = 0; i <= f->accept; i++) 871 setvec[i] = 0; 872 setcnt = 0; 873 /* compute positions of gototab[s,c] into setvec */ 874 p = f->posns[s]; 875 for (i = 1; i <= *p; i++) { 876 if ((k = f->re[p[i]].ltype) != FINAL) { 877 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 878 || (k == DOT && c != 0 && c != HAT) 879 || (k == ALL && c != 0) 880 || (k == EMPTYRE && c != 0) 881 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 882 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 883 q = f->re[p[i]].lfollow; 884 for (j = 1; j <= *q; j++) { 885 if (q[j] >= maxsetvec) { 886 setvec = reallocarray(setvec, 887 maxsetvec, 4 * sizeof(int)); 888 tmpset = reallocarray(tmpset, 889 maxsetvec, 4 * sizeof(int)); 890 if (setvec == 0 || tmpset == 0) 891 overflo("cgoto overflow"); 892 maxsetvec *= 4; 893 } 894 if (setvec[q[j]] == 0) { 895 setcnt++; 896 setvec[q[j]] = 1; 897 } 898 } 899 } 900 } 901 } 902 /* determine if setvec is a previous state */ 903 tmpset[0] = setcnt; 904 j = 1; 905 for (i = f->accept; i >= 0; i--) 906 if (setvec[i]) { 907 tmpset[j++] = i; 908 } 909 /* tmpset == previous state? */ 910 for (i = 1; i <= f->curstat; i++) { 911 p = f->posns[i]; 912 if ((k = tmpset[0]) != p[0]) 913 goto different; 914 for (j = 1; j <= k; j++) 915 if (tmpset[j] != p[j]) 916 goto different; 917 /* setvec is state i */ 918 f->gototab[s][c] = i; 919 return i; 920 different:; 921 } 922 923 /* add tmpset to current set of states */ 924 if (f->curstat >= NSTATES-1) { 925 f->curstat = 2; 926 f->reset = 1; 927 for (i = 2; i < NSTATES; i++) 928 xfree(f->posns[i]); 929 } else 930 ++(f->curstat); 931 for (i = 0; i < NCHARS; i++) 932 f->gototab[f->curstat][i] = 0; 933 xfree(f->posns[f->curstat]); 934 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) 935 overflo("out of space in cgoto"); 936 937 f->posns[f->curstat] = p; 938 f->gototab[s][c] = f->curstat; 939 for (i = 0; i <= setcnt; i++) 940 p[i] = tmpset[i]; 941 if (setvec[f->accept]) 942 f->out[f->curstat] = 1; 943 else 944 f->out[f->curstat] = 0; 945 return f->curstat; 946 } 947 948 949 void freefa(fa *f) /* free a finite automaton */ 950 { 951 int i; 952 953 if (f == NULL) 954 return; 955 for (i = 0; i <= f->curstat; i++) 956 xfree(f->posns[i]); 957 for (i = 0; i <= f->accept; i++) { 958 xfree(f->re[i].lfollow); 959 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 960 xfree((f->re[i].lval.np)); 961 } 962 xfree(f->restr); 963 xfree(f); 964 } 965