1 /* 2 * 3 * Portions Copyright %G% Sun Microsystems, Inc. All Rights Reserved 4 * 5 */ 6 7 #pragma ident "%Z%%M% %I% %E% SMI" 8 9 #include "portable.h" 10 #include "stdio.h" 11 #include "stdlib.h" 12 13 #if defined( MACOS ) || defined( DOS ) || defined( _WIN32 ) || defined( NEED_BSDREGEX ) 14 #include "regex.h" 15 16 /* 17 * regex - Regular expression pattern matching and replacement 18 * 19 * By: Ozan S. Yigit (oz) 20 * Dept. of Computer Science 21 * York University 22 * 23 * These routines are the PUBLIC DOMAIN equivalents of regex 24 * routines as found in 4.nBSD UN*X, with minor extensions. 25 * 26 * These routines are derived from various implementations found 27 * in software tools books, and Conroy's grep. They are NOT derived 28 * from licensed/restricted software. 29 * For more interesting/academic/complicated implementations, 30 * see Henry Spencer's regexp routines, or GNU Emacs pattern 31 * matching module. 32 * 33 * Modification history: 34 * 35 * $Log: regex.c,v $ 36 * Revision 1.12 1996/04/25 16:20:59 mcs 37 * make re_exec() match "" with ".*" and similar patterns 38 * hopefully this change doesn't break anything else! 39 * 40 * Revision 1.11 1994/12/14 21:33:45 mcs 41 * use new NEED_BSDREGEX 42 * fix pmatch() prototype 43 * 44 * Revision 1.10 1994/12/12 18:16:39 mcs 45 * use on NetBSD 46 * 47 * Revision 1.9 1994/11/15 19:16:35 mcs 48 * add (CHAR) cast to make VisualC++ happy 49 * 50 * Revision 1.8 1994/11/08 21:14:32 mcs 51 * WIN32 changes 52 * 53 * Revision 1.7 1994/07/23 19:51:24 mcs 54 * use ANSI-style inline function parameters 55 * 56 * Revision 1.6 1993/10/18 01:52:32 tim 57 * include for VMS 58 * 59 * Revision 1.5 1993/09/28 21:37:54 mcs 60 * HP/UX needs the regex we include (not in its libc) 61 * 62 * Revision 1.4 1993/08/27 15:59:52 mcs 63 * use CHAR for deftab 64 * 65 * Revision 1.3 1993/08/27 15:49:47 mcs 66 * added missing 0 to octal constants 67 * use unsigned char for CHAR under DOS 68 * 69 * Revision 1.2 1993/08/27 14:57:48 mcs 70 * add proto. for pmatch 71 * 72 * Revision 1.1 1993/08/18 21:20:02 mcs 73 * Initial revision 74 * 75 * Revision 1.4 1991/10/17 03:56:42 oz 76 * miscellaneous changes, small cleanups etc. 77 * 78 * Revision 1.3 1989/04/01 14:18:09 oz 79 * Change all references to a dfa: this is actually an nfa. 80 * 81 * Revision 1.2 88/08/28 15:36:04 oz 82 * Use a complement bitmap to represent NCL. 83 * This removes the need to have seperate 84 * code in the pmatch case block - it is 85 * just CCL code now. 86 * 87 * Use the actual CCL code in the CLO 88 * section of pmatch. No need for a recursive 89 * pmatch call. 90 * 91 * Use a bitmap table to set char bits in an 92 * 8-bit chunk. 93 * 94 * Interfaces: 95 * re_comp: compile a regular expression into a NFA. 96 * 97 * char *re_comp(s) 98 * char *s; 99 * 100 * re_exec: execute the NFA to match a pattern. 101 * 102 * int re_exec(s) 103 * char *s; 104 * 105 * re_modw change re_exec's understanding of what a "word" 106 * looks like (for \< and \>) by adding into the 107 * hidden word-syntax table. 108 * 109 * void re_modw(s) 110 * char *s; 111 * 112 * re_subs: substitute the matched portions in a new string. 113 * 114 * int re_subs(src, dst) 115 * char *src; 116 * char *dst; 117 * 118 * re_fail: failure routine for re_exec. 119 * 120 * void re_fail(msg, op) 121 * char *msg; 122 * char op; 123 * 124 * Regular Expressions: 125 * 126 * [1] char matches itself, unless it is a special 127 * character (metachar): . \ [ ] * + ^ $ 128 * 129 * [2] . matches any character. 130 * 131 * [3] \ matches the character following it, except 132 * when followed by a left or right round bracket, 133 * a digit 1 to 9 or a left or right angle bracket. 134 * (see [7], [8] and [9]) 135 * It is used as an escape character for all 136 * other meta-characters, and itself. When used 137 * in a set ([4]), it is treated as an ordinary 138 * character. 139 * 140 * [4] [set] matches one of the characters in the set. 141 * If the first character in the set is "^", 142 * it matches a character NOT in the set, i.e. 143 * complements the set. A shorthand S-E is 144 * used to specify a set of characters S upto 145 * E, inclusive. The special characters "]" and 146 * "-" have no special meaning if they appear 147 * as the first chars in the set. 148 * examples: match: 149 * 150 * [a-z] any lowercase alpha 151 * 152 * [^]-] any char except ] and - 153 * 154 * [^A-Z] any char except uppercase 155 * alpha 156 * 157 * [a-zA-Z] any alpha 158 * 159 * [5] * any regular expression form [1] to [4], followed by 160 * closure char (*) matches zero or more matches of 161 * that form. 162 * 163 * [6] + same as [5], except it matches one or more. 164 * 165 * [7] a regular expression in the form [1] to [10], enclosed 166 * as \(form\) matches what form matches. The enclosure 167 * creates a set of tags, used for [8] and for 168 * pattern substution. The tagged forms are numbered 169 * starting from 1. 170 * 171 * [8] a \ followed by a digit 1 to 9 matches whatever a 172 * previously tagged regular expression ([7]) matched. 173 * 174 * [9] \< a regular expression starting with a \< construct 175 * \> and/or ending with a \> construct, restricts the 176 * pattern matching to the beginning of a word, and/or 177 * the end of a word. A word is defined to be a character 178 * string beginning and/or ending with the characters 179 * A-Z a-z 0-9 and _. It must also be preceded and/or 180 * followed by any character outside those mentioned. 181 * 182 * [10] a composite regular expression xy where x and y 183 * are in the form [1] to [10] matches the longest 184 * match of x followed by a match for y. 185 * 186 * [11] ^ a regular expression starting with a ^ character 187 * $ and/or ending with a $ character, restricts the 188 * pattern matching to the beginning of the line, 189 * or the end of line. [anchors] Elsewhere in the 190 * pattern, ^ and $ are treated as ordinary characters. 191 * 192 * 193 * Acknowledgements: 194 * 195 * HCR's Hugh Redelmeier has been most helpful in various 196 * stages of development. He convinced me to include BOW 197 * and EOW constructs, originally invented by Rob Pike at 198 * the University of Toronto. 199 * 200 * References: 201 * Software tools Kernighan & Plauger 202 * Software tools in Pascal Kernighan & Plauger 203 * Grep [rsx-11 C dist] David Conroy 204 * ed - text editor Un*x Programmer's Manual 205 * Advanced editing on Un*x B. W. Kernighan 206 * RegExp routines Henry Spencer 207 * 208 * Notes: 209 * 210 * This implementation uses a bit-set representation for character 211 * classes for speed and compactness. Each character is represented 212 * by one bit in a 128-bit block. Thus, CCL always takes a 213 * constant 16 bytes in the internal nfa, and re_exec does a single 214 * bit comparison to locate the character in the set. 215 * 216 * Examples: 217 * 218 * pattern: foo*.* 219 * compile: CHR f CHR o CLO CHR o END CLO ANY END END 220 * matches: fo foo fooo foobar fobar foxx ... 221 * 222 * pattern: fo[ob]a[rz] 223 * compile: CHR f CHR o CCL bitset CHR a CCL bitset END 224 * matches: fobar fooar fobaz fooaz 225 * 226 * pattern: foo\\+ 227 * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END 228 * matches: foo\ foo\\ foo\\\ ... 229 * 230 * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo) 231 * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END 232 * matches: foo1foo foo2foo foo3foo 233 * 234 * pattern: \(fo.*\)-\1 235 * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END 236 * matches: foo-foo fo-fo fob-fob foobar-foobar ... 237 */ 238 239 #define MAXNFA 1024 240 #define MAXTAG 10 241 242 #define OKP 1 243 #define NOP 0 244 245 #define CHR 1 246 #define ANY 2 247 #define CCL 3 248 #define BOL 4 249 #define EOL 5 250 #define BOT 6 251 #define EOT 7 252 #define BOW 8 253 #define EOW 9 254 #define REF 10 255 #define CLO 11 256 257 #define END 0 258 259 /* 260 * The following defines are not meant to be changeable. 261 * They are for readability only. 262 */ 263 #define MAXCHR 128 264 #define CHRBIT 8 265 #define BITBLK MAXCHR/CHRBIT 266 #define BLKIND 0170 267 #define BITIND 07 268 269 #define ASCIIB 0177 270 271 #if defined( DOS ) || defined( _WIN32 ) || defined(SUN) 272 typedef unsigned char CHAR; 273 #else /* DOS */ 274 typedef /*unsigned*/ char CHAR; 275 #endif /* DOS */ 276 277 static int tagstk[MAXTAG]; /* subpat tag stack..*/ 278 static CHAR nfa[MAXNFA]; /* automaton.. */ 279 static int sta = NOP; /* status of lastpat */ 280 281 static CHAR bittab[BITBLK]; /* bit table for CCL */ 282 /* pre-set bits... */ 283 static CHAR bitarr[] = {1,2,4,8,16,32,64,128}; 284 285 #ifdef DEBUG 286 static void nfadump(CHAR *); 287 void symbolic(char *); 288 #endif 289 290 static void 291 chset(CHAR c) 292 { 293 bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND]; 294 } 295 296 #define badpat(x) (*nfa = END, x) 297 #define store(x) *mp++ = x 298 299 char * 300 re_comp( char *pat ) 301 { 302 register char *p; /* pattern pointer */ 303 register CHAR *mp=nfa; /* nfa pointer */ 304 register CHAR *lp; /* saved pointer.. */ 305 register CHAR *sp=nfa; /* another one.. */ 306 307 register int tagi = 0; /* tag stack index */ 308 register int tagc = 1; /* actual tag count */ 309 310 register int n; 311 register CHAR mask; /* xor mask -CCL/NCL */ 312 int c1, c2; 313 314 if (!pat || !*pat) 315 if (sta) 316 return 0; 317 else 318 return badpat("No previous regular expression"); 319 sta = NOP; 320 321 for (p = pat; *p; p++) { 322 lp = mp; 323 switch(*p) { 324 325 case '.': /* match any char.. */ 326 store(ANY); 327 break; 328 329 case '^': /* match beginning.. */ 330 if (p == pat) 331 store(BOL); 332 else { 333 store(CHR); 334 store(*p); 335 } 336 break; 337 338 case '$': /* match endofline.. */ 339 if (!*(p+1)) 340 store(EOL); 341 else { 342 store(CHR); 343 store(*p); 344 } 345 break; 346 347 case '[': /* match char class..*/ 348 store(CCL); 349 350 if (*++p == '^') { 351 mask = 0377; 352 p++; 353 } 354 else 355 mask = 0; 356 357 if (*p == '-') /* real dash */ 358 chset(*p++); 359 if (*p == ']') /* real brac */ 360 chset(*p++); 361 while (*p && *p != ']') { 362 if (*p == '-' && *(p+1) && *(p+1) != ']') { 363 p++; 364 c1 = *(p-2) + 1; 365 c2 = *p++; 366 while (c1 <= c2) 367 chset((CHAR)c1++); 368 } 369 #ifdef EXTEND 370 else if (*p == '\\' && *(p+1)) { 371 p++; 372 chset(*p++); 373 } 374 #endif 375 else 376 chset(*p++); 377 } 378 if (!*p) 379 return badpat("Missing ]"); 380 381 for (n = 0; n < BITBLK; bittab[n++] = (char) 0) 382 store(mask ^ bittab[n]); 383 384 break; 385 386 case '*': /* match 0 or more.. */ 387 case '+': /* match 1 or more.. */ 388 if (p == pat) 389 return badpat("Empty closure"); 390 lp = sp; /* previous opcode */ 391 if (*lp == CLO) /* equivalence.. */ 392 break; 393 switch(*lp) { 394 395 case BOL: 396 case BOT: 397 case EOT: 398 case BOW: 399 case EOW: 400 case REF: 401 return badpat("Illegal closure"); 402 default: 403 break; 404 } 405 406 if (*p == '+') 407 for (sp = mp; lp < sp; lp++) 408 store(*lp); 409 410 store(END); 411 store(END); 412 sp = mp; 413 while (--mp > lp) 414 *mp = mp[-1]; 415 store(CLO); 416 mp = sp; 417 break; 418 419 case '\\': /* tags, backrefs .. */ 420 switch(*++p) { 421 422 case '(': 423 if (tagc < MAXTAG) { 424 tagstk[++tagi] = tagc; 425 store(BOT); 426 store(tagc++); 427 } 428 else 429 return badpat("Too many \\(\\) pairs"); 430 break; 431 case ')': 432 if (*sp == BOT) 433 return badpat("Null pattern inside \\(\\)"); 434 if (tagi > 0) { 435 store(EOT); 436 store(tagstk[tagi--]); 437 } 438 else 439 return badpat("Unmatched \\)"); 440 break; 441 case '<': 442 store(BOW); 443 break; 444 case '>': 445 if (*sp == BOW) 446 return badpat("Null pattern inside \\<\\>"); 447 store(EOW); 448 break; 449 case '1': 450 case '2': 451 case '3': 452 case '4': 453 case '5': 454 case '6': 455 case '7': 456 case '8': 457 case '9': 458 n = *p-'0'; 459 if (tagi > 0 && tagstk[tagi] == n) 460 return badpat("Cyclical reference"); 461 if (tagc > n) { 462 store(REF); 463 store(n); 464 } 465 else 466 return badpat("Undetermined reference"); 467 break; 468 #ifdef EXTEND 469 case 'b': 470 store(CHR); 471 store('\b'); 472 break; 473 case 'n': 474 store(CHR); 475 store('\n'); 476 break; 477 case 'f': 478 store(CHR); 479 store('\f'); 480 break; 481 case 'r': 482 store(CHR); 483 store('\r'); 484 break; 485 case 't': 486 store(CHR); 487 store('\t'); 488 break; 489 #endif 490 default: 491 store(CHR); 492 store(*p); 493 } 494 break; 495 496 default : /* an ordinary char */ 497 store(CHR); 498 store(*p); 499 break; 500 } 501 sp = lp; 502 } 503 if (tagi > 0) 504 return badpat("Unmatched \\("); 505 store(END); 506 sta = OKP; 507 return 0; 508 } 509 510 511 static char *bol; 512 char *bopat[MAXTAG]; 513 char *eopat[MAXTAG]; 514 #ifdef NEEDPROTOS 515 static char *pmatch( char *lp, CHAR *ap ); 516 #else /* NEEDPROTOS */ 517 static char *pmatch(); 518 #endif /* NEEDPROTOS */ 519 520 /* 521 * re_exec: 522 * execute nfa to find a match. 523 * 524 * special cases: (nfa[0]) 525 * BOL 526 * Match only once, starting from the 527 * beginning. 528 * CHR 529 * First locate the character without 530 * calling pmatch, and if found, call 531 * pmatch for the remaining string. 532 * END 533 * re_comp failed, poor luser did not 534 * check for it. Fail fast. 535 * 536 * If a match is found, bopat[0] and eopat[0] are set 537 * to the beginning and the end of the matched fragment, 538 * respectively. 539 * 540 */ 541 542 int 543 re_exec( char *lp ) 544 { 545 register char c; 546 register char *ep = 0; 547 register CHAR *ap = nfa; 548 549 bol = lp; 550 551 bopat[0] = 0; 552 bopat[1] = 0; 553 bopat[2] = 0; 554 bopat[3] = 0; 555 bopat[4] = 0; 556 bopat[5] = 0; 557 bopat[6] = 0; 558 bopat[7] = 0; 559 bopat[8] = 0; 560 bopat[9] = 0; 561 562 switch(*ap) { 563 564 case BOL: /* anchored: match from BOL only */ 565 ep = pmatch(lp,ap); 566 break; 567 case CHR: /* ordinary char: locate it fast */ 568 c = *(ap+1); 569 while (*lp && *lp != c) 570 lp++; 571 if (!*lp) /* if EOS, fail, else fall thru. */ 572 return 0; 573 default: /* regular matching all the way. */ 574 do { 575 if ((ep = pmatch(lp,ap))) 576 break; 577 lp++; 578 } while (*lp); 579 580 break; 581 case END: /* munged automaton. fail always */ 582 return 0; 583 } 584 if (!ep) 585 return 0; 586 587 bopat[0] = lp; 588 eopat[0] = ep; 589 return 1; 590 } 591 592 /* 593 * pmatch: internal routine for the hard part 594 * 595 * This code is partly snarfed from an early grep written by 596 * David Conroy. The backref and tag stuff, and various other 597 * innovations are by oz. 598 * 599 * special case optimizations: (nfa[n], nfa[n+1]) 600 * CLO ANY 601 * We KNOW .* will match everything upto the 602 * end of line. Thus, directly go to the end of 603 * line, without recursive pmatch calls. As in 604 * the other closure cases, the remaining pattern 605 * must be matched by moving backwards on the 606 * string recursively, to find a match for xy 607 * (x is ".*" and y is the remaining pattern) 608 * where the match satisfies the LONGEST match for 609 * x followed by a match for y. 610 * CLO CHR 611 * We can again scan the string forward for the 612 * single char and at the point of failure, we 613 * execute the remaining nfa recursively, same as 614 * above. 615 * 616 * At the end of a successful match, bopat[n] and eopat[n] 617 * are set to the beginning and end of subpatterns matched 618 * by tagged expressions (n = 1 to 9). 619 * 620 */ 621 622 #ifndef re_fail 623 extern void re_fail(); 624 #endif /* re_fail */ 625 626 /* 627 * character classification table for word boundary operators BOW 628 * and EOW. the reason for not using ctype macros is that we can 629 * let the user add into our own table. see re_modw. This table 630 * is not in the bitset form, since we may wish to extend it in the 631 * future for other character classifications. 632 * 633 * TRUE for 0-9 A-Z a-z _ 634 */ 635 static char chrtyp[MAXCHR] = { 636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 641 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 642 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 643 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 645 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 646 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 647 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 648 1, 1, 1, 0, 0, 0, 0, 0 649 }; 650 651 #define inascii(x) (0177&(x)) 652 #define iswordc(x) chrtyp[inascii(x)] 653 #define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND]) 654 655 /* 656 * skip values for CLO XXX to skip past the closure 657 */ 658 659 #define ANYSKIP 2 /* [CLO] ANY END ... */ 660 #define CHRSKIP 3 /* [CLO] CHR chr END ... */ 661 #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */ 662 663 static char * 664 pmatch( char *lp, CHAR *ap) 665 { 666 register int op, c, n; 667 register char *e; /* extra pointer for CLO */ 668 register char *bp; /* beginning of subpat.. */ 669 register char *ep; /* ending of subpat.. */ 670 char *are; /* to save the line ptr. */ 671 672 while ((op = *ap++) != END) 673 switch(op) { 674 675 case CHR: 676 if (*lp++ != *ap++) 677 return 0; 678 break; 679 case ANY: 680 if (!*lp++) 681 return 0; 682 break; 683 case CCL: 684 c = *lp++; 685 if (!isinset(ap,c)) 686 return 0; 687 ap += BITBLK; 688 break; 689 case BOL: 690 if (lp != bol) 691 return 0; 692 break; 693 case EOL: 694 if (*lp) 695 return 0; 696 break; 697 case BOT: 698 bopat[*ap++] = lp; 699 break; 700 case EOT: 701 eopat[*ap++] = lp; 702 break; 703 case BOW: 704 if (lp!=bol && iswordc(lp[-1]) || !iswordc(*lp)) 705 return 0; 706 break; 707 case EOW: 708 if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp)) 709 return 0; 710 break; 711 case REF: 712 n = *ap++; 713 bp = bopat[n]; 714 ep = eopat[n]; 715 while (bp < ep) 716 if (*bp++ != *lp++) 717 return 0; 718 break; 719 case CLO: 720 are = lp; 721 switch(*ap) { 722 723 case ANY: 724 while (*lp) 725 lp++; 726 n = ANYSKIP; 727 break; 728 case CHR: 729 c = *(ap+1); 730 while (*lp && c == *lp) 731 lp++; 732 n = CHRSKIP; 733 break; 734 case CCL: 735 while ((c = *lp) && isinset(ap+1,c)) 736 lp++; 737 n = CCLSKIP; 738 break; 739 default: 740 re_fail("closure: bad nfa.", *ap); 741 return 0; 742 } 743 744 ap += n; 745 746 while (lp >= are) { 747 if (e = pmatch(lp, ap)) 748 return e; 749 --lp; 750 } 751 return 0; 752 default: 753 re_fail("re_exec: bad nfa.", op); 754 return 0; 755 } 756 return lp; 757 } 758 759 /* 760 * re_modw: 761 * add new characters into the word table to change re_exec's 762 * understanding of what a word should look like. Note that we 763 * only accept additions into the word definition. 764 * 765 * If the string parameter is 0 or null string, the table is 766 * reset back to the default containing A-Z a-z 0-9 _. [We use 767 * the compact bitset representation for the default table] 768 */ 769 770 static CHAR deftab[16] = { 771 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207, 772 0376, 0377, 0377, 007 773 }; 774 775 void 776 re_modw( char *s ) 777 { 778 register int i; 779 780 if (!s || !*s) { 781 for (i = 0; i < MAXCHR; i++) 782 if (!isinset(deftab,i)) 783 iswordc(i) = 0; 784 } 785 else 786 while(*s) 787 iswordc(*s++) = 1; 788 } 789 790 /* 791 * re_subs: 792 * substitute the matched portions of the src in dst. 793 * 794 * & substitute the entire matched pattern. 795 * 796 * \digit substitute a subpattern, with the given tag number. 797 * Tags are numbered from 1 to 9. If the particular 798 * tagged subpattern does not exist, null is substituted. 799 */ 800 int 801 re_subs( char *src, char *dst) 802 { 803 register char c; 804 register int pin; 805 register char *bp; 806 register char *ep; 807 808 if (!*src || !bopat[0]) 809 return 0; 810 811 while (c = *src++) { 812 switch(c) { 813 814 case '&': 815 pin = 0; 816 break; 817 818 case '\\': 819 c = *src++; 820 if (c >= '0' && c <= '9') { 821 pin = c - '0'; 822 break; 823 } 824 825 default: 826 *dst++ = c; 827 continue; 828 } 829 830 if ((bp = bopat[pin]) && (ep = eopat[pin])) { 831 while (*bp && bp < ep) 832 *dst++ = *bp++; 833 if (bp < ep) 834 return 0; 835 } 836 } 837 *dst = (char) 0; 838 return 1; 839 } 840 841 #ifdef DEBUG 842 /* 843 * symbolic - produce a symbolic dump of the nfa 844 */ 845 void 846 symbolic( char *s ) 847 { 848 (void) printf("pattern: %s\n", s); 849 (void) printf("nfacode:\n"); 850 nfadump(nfa); 851 } 852 853 static void 854 nfadump( CHAR *ap) 855 { 856 register int n; 857 858 while (*ap != END) 859 switch(*ap++) { 860 case CLO: 861 (void) printf("CLOSURE"); 862 nfadump(ap); 863 switch(*ap) { 864 case CHR: 865 n = CHRSKIP; 866 break; 867 case ANY: 868 n = ANYSKIP; 869 break; 870 case CCL: 871 n = CCLSKIP; 872 break; 873 } 874 ap += n; 875 break; 876 case CHR: 877 (void) printf("\tCHR %c\n",*ap++); 878 break; 879 case ANY: 880 (void) printf("\tANY .\n"); 881 break; 882 case BOL: 883 (void) printf("\tBOL -\n"); 884 break; 885 case EOL: 886 (void) printf("\tEOL -\n"); 887 break; 888 case BOT: 889 (void) printf("BOT: %d\n",*ap++); 890 break; 891 case EOT: 892 (void) printf("EOT: %d\n",*ap++); 893 break; 894 case BOW: 895 (void) printf("BOW\n"); 896 break; 897 case EOW: 898 (void) printf("EOW\n"); 899 break; 900 case REF: 901 (void) printf("REF: %d\n",*ap++); 902 break; 903 case CCL: 904 (void) printf("\tCCL ["); 905 for (n = 0; n < MAXCHR; n++) 906 if (isinset(ap,(CHAR)n)) { 907 if (n < ' ') 908 (void) printf("^%c", n ^ 0x040); 909 else 910 (void) printf("%c", n); 911 } 912 (void) printf("]\n"); 913 ap += BITBLK; 914 break; 915 default: 916 (void) printf("bad nfa. opcode %o\n", ap[-1]); 917 exit(1); 918 break; 919 } 920 } 921 #endif 922 #endif /* MACOS or DOS or NEED_BSDREGEX */ 923