1 /*
2 *
3 * Portions Copyright 1998 Sun Microsystems, Inc. All rights reserved.
4 * Use is subject to license terms.
5 *
6 */
7
8 #pragma ident "%Z%%M% %I% %E% SMI"
9
10 #include "portable.h"
11 #include "stdio.h"
12 #include "stdlib.h"
13
14 #if defined( MACOS ) || defined( DOS ) || defined( _WIN32 ) || defined( NEED_BSDREGEX )
15 #include "regex.h"
16
17 /*
18 * regex - Regular expression pattern matching and replacement
19 *
20 * By: Ozan S. Yigit (oz)
21 * Dept. of Computer Science
22 * York University
23 *
24 * These routines are the PUBLIC DOMAIN equivalents of regex
25 * routines as found in 4.nBSD UN*X, with minor extensions.
26 *
27 * These routines are derived from various implementations found
28 * in software tools books, and Conroy's grep. They are NOT derived
29 * from licensed/restricted software.
30 * For more interesting/academic/complicated implementations,
31 * see Henry Spencer's regexp routines, or GNU Emacs pattern
32 * matching module.
33 *
34 * Modification history:
35 *
36 * $Log: regex.c,v $
37 * Revision 1.12 1996/04/25 16:20:59 mcs
38 * make re_exec() match "" with ".*" and similar patterns
39 * hopefully this change doesn't break anything else!
40 *
41 * Revision 1.11 1994/12/14 21:33:45 mcs
42 * use new NEED_BSDREGEX
43 * fix pmatch() prototype
44 *
45 * Revision 1.10 1994/12/12 18:16:39 mcs
46 * use on NetBSD
47 *
48 * Revision 1.9 1994/11/15 19:16:35 mcs
49 * add (CHAR) cast to make VisualC++ happy
50 *
51 * Revision 1.8 1994/11/08 21:14:32 mcs
52 * WIN32 changes
53 *
54 * Revision 1.7 1994/07/23 19:51:24 mcs
55 * use ANSI-style inline function parameters
56 *
57 * Revision 1.6 1993/10/18 01:52:32 tim
58 * include for VMS
59 *
60 * Revision 1.5 1993/09/28 21:37:54 mcs
61 * HP/UX needs the regex we include (not in its libc)
62 *
63 * Revision 1.4 1993/08/27 15:59:52 mcs
64 * use CHAR for deftab
65 *
66 * Revision 1.3 1993/08/27 15:49:47 mcs
67 * added missing 0 to octal constants
68 * use unsigned char for CHAR under DOS
69 *
70 * Revision 1.2 1993/08/27 14:57:48 mcs
71 * add proto. for pmatch
72 *
73 * Revision 1.1 1993/08/18 21:20:02 mcs
74 * Initial revision
75 *
76 * Revision 1.4 1991/10/17 03:56:42 oz
77 * miscellaneous changes, small cleanups etc.
78 *
79 * Revision 1.3 1989/04/01 14:18:09 oz
80 * Change all references to a dfa: this is actually an nfa.
81 *
82 * Revision 1.2 88/08/28 15:36:04 oz
83 * Use a complement bitmap to represent NCL.
84 * This removes the need to have seperate
85 * code in the pmatch case block - it is
86 * just CCL code now.
87 *
88 * Use the actual CCL code in the CLO
89 * section of pmatch. No need for a recursive
90 * pmatch call.
91 *
92 * Use a bitmap table to set char bits in an
93 * 8-bit chunk.
94 *
95 * Interfaces:
96 * re_comp: compile a regular expression into a NFA.
97 *
98 * char *re_comp(s)
99 * char *s;
100 *
101 * re_exec: execute the NFA to match a pattern.
102 *
103 * int re_exec(s)
104 * char *s;
105 *
106 * re_modw change re_exec's understanding of what a "word"
107 * looks like (for \< and \>) by adding into the
108 * hidden word-syntax table.
109 *
110 * void re_modw(s)
111 * char *s;
112 *
113 * re_subs: substitute the matched portions in a new string.
114 *
115 * int re_subs(src, dst)
116 * char *src;
117 * char *dst;
118 *
119 * re_fail: failure routine for re_exec.
120 *
121 * void re_fail(msg, op)
122 * char *msg;
123 * char op;
124 *
125 * Regular Expressions:
126 *
127 * [1] char matches itself, unless it is a special
128 * character (metachar): . \ [ ] * + ^ $
129 *
130 * [2] . matches any character.
131 *
132 * [3] \ matches the character following it, except
133 * when followed by a left or right round bracket,
134 * a digit 1 to 9 or a left or right angle bracket.
135 * (see [7], [8] and [9])
136 * It is used as an escape character for all
137 * other meta-characters, and itself. When used
138 * in a set ([4]), it is treated as an ordinary
139 * character.
140 *
141 * [4] [set] matches one of the characters in the set.
142 * If the first character in the set is "^",
143 * it matches a character NOT in the set, i.e.
144 * complements the set. A shorthand S-E is
145 * used to specify a set of characters S upto
146 * E, inclusive. The special characters "]" and
147 * "-" have no special meaning if they appear
148 * as the first chars in the set.
149 * examples: match:
150 *
151 * [a-z] any lowercase alpha
152 *
153 * [^]-] any char except ] and -
154 *
155 * [^A-Z] any char except uppercase
156 * alpha
157 *
158 * [a-zA-Z] any alpha
159 *
160 * [5] * any regular expression form [1] to [4], followed by
161 * closure char (*) matches zero or more matches of
162 * that form.
163 *
164 * [6] + same as [5], except it matches one or more.
165 *
166 * [7] a regular expression in the form [1] to [10], enclosed
167 * as \(form\) matches what form matches. The enclosure
168 * creates a set of tags, used for [8] and for
169 * pattern substution. The tagged forms are numbered
170 * starting from 1.
171 *
172 * [8] a \ followed by a digit 1 to 9 matches whatever a
173 * previously tagged regular expression ([7]) matched.
174 *
175 * [9] \< a regular expression starting with a \< construct
176 * \> and/or ending with a \> construct, restricts the
177 * pattern matching to the beginning of a word, and/or
178 * the end of a word. A word is defined to be a character
179 * string beginning and/or ending with the characters
180 * A-Z a-z 0-9 and _. It must also be preceded and/or
181 * followed by any character outside those mentioned.
182 *
183 * [10] a composite regular expression xy where x and y
184 * are in the form [1] to [10] matches the longest
185 * match of x followed by a match for y.
186 *
187 * [11] ^ a regular expression starting with a ^ character
188 * $ and/or ending with a $ character, restricts the
189 * pattern matching to the beginning of the line,
190 * or the end of line. [anchors] Elsewhere in the
191 * pattern, ^ and $ are treated as ordinary characters.
192 *
193 *
194 * Acknowledgements:
195 *
196 * HCR's Hugh Redelmeier has been most helpful in various
197 * stages of development. He convinced me to include BOW
198 * and EOW constructs, originally invented by Rob Pike at
199 * the University of Toronto.
200 *
201 * References:
202 * Software tools Kernighan & Plauger
203 * Software tools in Pascal Kernighan & Plauger
204 * Grep [rsx-11 C dist] David Conroy
205 * ed - text editor Un*x Programmer's Manual
206 * Advanced editing on Un*x B. W. Kernighan
207 * RegExp routines Henry Spencer
208 *
209 * Notes:
210 *
211 * This implementation uses a bit-set representation for character
212 * classes for speed and compactness. Each character is represented
213 * by one bit in a 128-bit block. Thus, CCL always takes a
214 * constant 16 bytes in the internal nfa, and re_exec does a single
215 * bit comparison to locate the character in the set.
216 *
217 * Examples:
218 *
219 * pattern: foo*.*
220 * compile: CHR f CHR o CLO CHR o END CLO ANY END END
221 * matches: fo foo fooo foobar fobar foxx ...
222 *
223 * pattern: fo[ob]a[rz]
224 * compile: CHR f CHR o CCL bitset CHR a CCL bitset END
225 * matches: fobar fooar fobaz fooaz
226 *
227 * pattern: foo\\+
228 * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END
229 * matches: foo\ foo\\ foo\\\ ...
230 *
231 * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo)
232 * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
233 * matches: foo1foo foo2foo foo3foo
234 *
235 * pattern: \(fo.*\)-\1
236 * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
237 * matches: foo-foo fo-fo fob-fob foobar-foobar ...
238 */
239
240 #define MAXNFA 1024
241 #define MAXTAG 10
242
243 #define OKP 1
244 #define NOP 0
245
246 #define CHR 1
247 #define ANY 2
248 #define CCL 3
249 #define BOL 4
250 #define EOL 5
251 #define BOT 6
252 #define EOT 7
253 #define BOW 8
254 #define EOW 9
255 #define REF 10
256 #define CLO 11
257
258 #define END 0
259
260 /*
261 * The following defines are not meant to be changeable.
262 * They are for readability only.
263 */
264 #define MAXCHR 128
265 #define CHRBIT 8
266 #define BITBLK MAXCHR/CHRBIT
267 #define BLKIND 0170
268 #define BITIND 07
269
270 #define ASCIIB 0177
271
272 #if defined( DOS ) || defined( _WIN32 ) || defined(SUN)
273 typedef unsigned char CHAR;
274 #else /* DOS */
275 typedef /*unsigned*/ char CHAR;
276 #endif /* DOS */
277
278 static int tagstk[MAXTAG]; /* subpat tag stack..*/
279 static CHAR nfa[MAXNFA]; /* automaton.. */
280 static int sta = NOP; /* status of lastpat */
281
282 static CHAR bittab[BITBLK]; /* bit table for CCL */
283 /* pre-set bits... */
284 static CHAR bitarr[] = {1,2,4,8,16,32,64,128};
285
286 #ifdef DEBUG
287 static void nfadump(CHAR *);
288 void symbolic(char *);
289 #endif
290
291 static void
chset(CHAR c)292 chset(CHAR c)
293 {
294 bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND];
295 }
296
297 #define badpat(x) (*nfa = END, x)
298 #define store(x) *mp++ = x
299
300 char *
re_comp(char * pat)301 re_comp( char *pat )
302 {
303 register char *p; /* pattern pointer */
304 register CHAR *mp=nfa; /* nfa pointer */
305 register CHAR *lp; /* saved pointer.. */
306 register CHAR *sp=nfa; /* another one.. */
307
308 register int tagi = 0; /* tag stack index */
309 register int tagc = 1; /* actual tag count */
310
311 register int n;
312 register CHAR mask; /* xor mask -CCL/NCL */
313 int c1, c2;
314
315 if (!pat || !*pat)
316 if (sta)
317 return 0;
318 else
319 return badpat("No previous regular expression");
320 sta = NOP;
321
322 for (p = pat; *p; p++) {
323 lp = mp;
324 switch(*p) {
325
326 case '.': /* match any char.. */
327 store(ANY);
328 break;
329
330 case '^': /* match beginning.. */
331 if (p == pat)
332 store(BOL);
333 else {
334 store(CHR);
335 store(*p);
336 }
337 break;
338
339 case '$': /* match endofline.. */
340 if (!*(p+1))
341 store(EOL);
342 else {
343 store(CHR);
344 store(*p);
345 }
346 break;
347
348 case '[': /* match char class..*/
349 store(CCL);
350
351 if (*++p == '^') {
352 mask = 0377;
353 p++;
354 }
355 else
356 mask = 0;
357
358 if (*p == '-') /* real dash */
359 chset(*p++);
360 if (*p == ']') /* real brac */
361 chset(*p++);
362 while (*p && *p != ']') {
363 if (*p == '-' && *(p+1) && *(p+1) != ']') {
364 p++;
365 c1 = *(p-2) + 1;
366 c2 = *p++;
367 while (c1 <= c2)
368 chset((CHAR)c1++);
369 }
370 #ifdef EXTEND
371 else if (*p == '\\' && *(p+1)) {
372 p++;
373 chset(*p++);
374 }
375 #endif
376 else
377 chset(*p++);
378 }
379 if (!*p)
380 return badpat("Missing ]");
381
382 for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
383 store(mask ^ bittab[n]);
384
385 break;
386
387 case '*': /* match 0 or more.. */
388 case '+': /* match 1 or more.. */
389 if (p == pat)
390 return badpat("Empty closure");
391 lp = sp; /* previous opcode */
392 if (*lp == CLO) /* equivalence.. */
393 break;
394 switch(*lp) {
395
396 case BOL:
397 case BOT:
398 case EOT:
399 case BOW:
400 case EOW:
401 case REF:
402 return badpat("Illegal closure");
403 default:
404 break;
405 }
406
407 if (*p == '+')
408 for (sp = mp; lp < sp; lp++)
409 store(*lp);
410
411 store(END);
412 store(END);
413 sp = mp;
414 while (--mp > lp)
415 *mp = mp[-1];
416 store(CLO);
417 mp = sp;
418 break;
419
420 case '\\': /* tags, backrefs .. */
421 switch(*++p) {
422
423 case '(':
424 if (tagc < MAXTAG) {
425 tagstk[++tagi] = tagc;
426 store(BOT);
427 store(tagc++);
428 }
429 else
430 return badpat("Too many \\(\\) pairs");
431 break;
432 case ')':
433 if (*sp == BOT)
434 return badpat("Null pattern inside \\(\\)");
435 if (tagi > 0) {
436 store(EOT);
437 store(tagstk[tagi--]);
438 }
439 else
440 return badpat("Unmatched \\)");
441 break;
442 case '<':
443 store(BOW);
444 break;
445 case '>':
446 if (*sp == BOW)
447 return badpat("Null pattern inside \\<\\>");
448 store(EOW);
449 break;
450 case '1':
451 case '2':
452 case '3':
453 case '4':
454 case '5':
455 case '6':
456 case '7':
457 case '8':
458 case '9':
459 n = *p-'0';
460 if (tagi > 0 && tagstk[tagi] == n)
461 return badpat("Cyclical reference");
462 if (tagc > n) {
463 store(REF);
464 store(n);
465 }
466 else
467 return badpat("Undetermined reference");
468 break;
469 #ifdef EXTEND
470 case 'b':
471 store(CHR);
472 store('\b');
473 break;
474 case 'n':
475 store(CHR);
476 store('\n');
477 break;
478 case 'f':
479 store(CHR);
480 store('\f');
481 break;
482 case 'r':
483 store(CHR);
484 store('\r');
485 break;
486 case 't':
487 store(CHR);
488 store('\t');
489 break;
490 #endif
491 default:
492 store(CHR);
493 store(*p);
494 }
495 break;
496
497 default : /* an ordinary char */
498 store(CHR);
499 store(*p);
500 break;
501 }
502 sp = lp;
503 }
504 if (tagi > 0)
505 return badpat("Unmatched \\(");
506 store(END);
507 sta = OKP;
508 return 0;
509 }
510
511
512 static char *bol;
513 char *bopat[MAXTAG];
514 char *eopat[MAXTAG];
515 #ifdef NEEDPROTOS
516 static char *pmatch( char *lp, CHAR *ap );
517 #else /* NEEDPROTOS */
518 static char *pmatch();
519 #endif /* NEEDPROTOS */
520
521 /*
522 * re_exec:
523 * execute nfa to find a match.
524 *
525 * special cases: (nfa[0])
526 * BOL
527 * Match only once, starting from the
528 * beginning.
529 * CHR
530 * First locate the character without
531 * calling pmatch, and if found, call
532 * pmatch for the remaining string.
533 * END
534 * re_comp failed, poor luser did not
535 * check for it. Fail fast.
536 *
537 * If a match is found, bopat[0] and eopat[0] are set
538 * to the beginning and the end of the matched fragment,
539 * respectively.
540 *
541 */
542
543 int
re_exec(char * lp)544 re_exec( char *lp )
545 {
546 register char c;
547 register char *ep = 0;
548 register CHAR *ap = nfa;
549
550 bol = lp;
551
552 bopat[0] = 0;
553 bopat[1] = 0;
554 bopat[2] = 0;
555 bopat[3] = 0;
556 bopat[4] = 0;
557 bopat[5] = 0;
558 bopat[6] = 0;
559 bopat[7] = 0;
560 bopat[8] = 0;
561 bopat[9] = 0;
562
563 switch(*ap) {
564
565 case BOL: /* anchored: match from BOL only */
566 ep = pmatch(lp,ap);
567 break;
568 case CHR: /* ordinary char: locate it fast */
569 c = *(ap+1);
570 while (*lp && *lp != c)
571 lp++;
572 if (!*lp) /* if EOS, fail, else fall thru. */
573 return 0;
574 default: /* regular matching all the way. */
575 do {
576 if ((ep = pmatch(lp,ap)))
577 break;
578 lp++;
579 } while (*lp);
580
581 break;
582 case END: /* munged automaton. fail always */
583 return 0;
584 }
585 if (!ep)
586 return 0;
587
588 bopat[0] = lp;
589 eopat[0] = ep;
590 return 1;
591 }
592
593 /*
594 * pmatch: internal routine for the hard part
595 *
596 * This code is partly snarfed from an early grep written by
597 * David Conroy. The backref and tag stuff, and various other
598 * innovations are by oz.
599 *
600 * special case optimizations: (nfa[n], nfa[n+1])
601 * CLO ANY
602 * We KNOW .* will match everything upto the
603 * end of line. Thus, directly go to the end of
604 * line, without recursive pmatch calls. As in
605 * the other closure cases, the remaining pattern
606 * must be matched by moving backwards on the
607 * string recursively, to find a match for xy
608 * (x is ".*" and y is the remaining pattern)
609 * where the match satisfies the LONGEST match for
610 * x followed by a match for y.
611 * CLO CHR
612 * We can again scan the string forward for the
613 * single char and at the point of failure, we
614 * execute the remaining nfa recursively, same as
615 * above.
616 *
617 * At the end of a successful match, bopat[n] and eopat[n]
618 * are set to the beginning and end of subpatterns matched
619 * by tagged expressions (n = 1 to 9).
620 *
621 */
622
623 #ifndef re_fail
624 extern void re_fail();
625 #endif /* re_fail */
626
627 /*
628 * character classification table for word boundary operators BOW
629 * and EOW. the reason for not using ctype macros is that we can
630 * let the user add into our own table. see re_modw. This table
631 * is not in the bitset form, since we may wish to extend it in the
632 * future for other character classifications.
633 *
634 * TRUE for 0-9 A-Z a-z _
635 */
636 static char chrtyp[MAXCHR] = {
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
642 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
643 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
645 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
646 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
647 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
648 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
649 1, 1, 1, 0, 0, 0, 0, 0
650 };
651
652 #define inascii(x) (0177&(x))
653 #define iswordc(x) chrtyp[inascii(x)]
654 #define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
655
656 /*
657 * skip values for CLO XXX to skip past the closure
658 */
659
660 #define ANYSKIP 2 /* [CLO] ANY END ... */
661 #define CHRSKIP 3 /* [CLO] CHR chr END ... */
662 #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
663
664 static char *
pmatch(char * lp,CHAR * ap)665 pmatch( char *lp, CHAR *ap)
666 {
667 register int op, c, n;
668 register char *e; /* extra pointer for CLO */
669 register char *bp; /* beginning of subpat.. */
670 register char *ep; /* ending of subpat.. */
671 char *are; /* to save the line ptr. */
672
673 while ((op = *ap++) != END)
674 switch(op) {
675
676 case CHR:
677 if (*lp++ != *ap++)
678 return 0;
679 break;
680 case ANY:
681 if (!*lp++)
682 return 0;
683 break;
684 case CCL:
685 c = *lp++;
686 if (!isinset(ap,c))
687 return 0;
688 ap += BITBLK;
689 break;
690 case BOL:
691 if (lp != bol)
692 return 0;
693 break;
694 case EOL:
695 if (*lp)
696 return 0;
697 break;
698 case BOT:
699 bopat[*ap++] = lp;
700 break;
701 case EOT:
702 eopat[*ap++] = lp;
703 break;
704 case BOW:
705 if (lp!=bol && iswordc(lp[-1]) || !iswordc(*lp))
706 return 0;
707 break;
708 case EOW:
709 if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
710 return 0;
711 break;
712 case REF:
713 n = *ap++;
714 bp = bopat[n];
715 ep = eopat[n];
716 while (bp < ep)
717 if (*bp++ != *lp++)
718 return 0;
719 break;
720 case CLO:
721 are = lp;
722 switch(*ap) {
723
724 case ANY:
725 while (*lp)
726 lp++;
727 n = ANYSKIP;
728 break;
729 case CHR:
730 c = *(ap+1);
731 while (*lp && c == *lp)
732 lp++;
733 n = CHRSKIP;
734 break;
735 case CCL:
736 while ((c = *lp) && isinset(ap+1,c))
737 lp++;
738 n = CCLSKIP;
739 break;
740 default:
741 re_fail("closure: bad nfa.", *ap);
742 return 0;
743 }
744
745 ap += n;
746
747 while (lp >= are) {
748 if (e = pmatch(lp, ap))
749 return e;
750 --lp;
751 }
752 return 0;
753 default:
754 re_fail("re_exec: bad nfa.", op);
755 return 0;
756 }
757 return lp;
758 }
759
760 /*
761 * re_modw:
762 * add new characters into the word table to change re_exec's
763 * understanding of what a word should look like. Note that we
764 * only accept additions into the word definition.
765 *
766 * If the string parameter is 0 or null string, the table is
767 * reset back to the default containing A-Z a-z 0-9 _. [We use
768 * the compact bitset representation for the default table]
769 */
770
771 static CHAR deftab[16] = {
772 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
773 0376, 0377, 0377, 007
774 };
775
776 void
re_modw(char * s)777 re_modw( char *s )
778 {
779 register int i;
780
781 if (!s || !*s) {
782 for (i = 0; i < MAXCHR; i++)
783 if (!isinset(deftab,i))
784 iswordc(i) = 0;
785 }
786 else
787 while(*s)
788 iswordc(*s++) = 1;
789 }
790
791 /*
792 * re_subs:
793 * substitute the matched portions of the src in dst.
794 *
795 * & substitute the entire matched pattern.
796 *
797 * \digit substitute a subpattern, with the given tag number.
798 * Tags are numbered from 1 to 9. If the particular
799 * tagged subpattern does not exist, null is substituted.
800 */
801 int
re_subs(char * src,char * dst)802 re_subs( char *src, char *dst)
803 {
804 register char c;
805 register int pin;
806 register char *bp;
807 register char *ep;
808
809 if (!*src || !bopat[0])
810 return 0;
811
812 while (c = *src++) {
813 switch(c) {
814
815 case '&':
816 pin = 0;
817 break;
818
819 case '\\':
820 c = *src++;
821 if (c >= '0' && c <= '9') {
822 pin = c - '0';
823 break;
824 }
825
826 default:
827 *dst++ = c;
828 continue;
829 }
830
831 if ((bp = bopat[pin]) && (ep = eopat[pin])) {
832 while (*bp && bp < ep)
833 *dst++ = *bp++;
834 if (bp < ep)
835 return 0;
836 }
837 }
838 *dst = (char) 0;
839 return 1;
840 }
841
842 #ifdef DEBUG
843 /*
844 * symbolic - produce a symbolic dump of the nfa
845 */
846 void
symbolic(char * s)847 symbolic( char *s )
848 {
849 (void) printf("pattern: %s\n", s);
850 (void) printf("nfacode:\n");
851 nfadump(nfa);
852 }
853
854 static void
nfadump(CHAR * ap)855 nfadump( CHAR *ap)
856 {
857 register int n;
858
859 while (*ap != END)
860 switch(*ap++) {
861 case CLO:
862 (void) printf("CLOSURE");
863 nfadump(ap);
864 switch(*ap) {
865 case CHR:
866 n = CHRSKIP;
867 break;
868 case ANY:
869 n = ANYSKIP;
870 break;
871 case CCL:
872 n = CCLSKIP;
873 break;
874 }
875 ap += n;
876 break;
877 case CHR:
878 (void) printf("\tCHR %c\n",*ap++);
879 break;
880 case ANY:
881 (void) printf("\tANY .\n");
882 break;
883 case BOL:
884 (void) printf("\tBOL -\n");
885 break;
886 case EOL:
887 (void) printf("\tEOL -\n");
888 break;
889 case BOT:
890 (void) printf("BOT: %d\n",*ap++);
891 break;
892 case EOT:
893 (void) printf("EOT: %d\n",*ap++);
894 break;
895 case BOW:
896 (void) printf("BOW\n");
897 break;
898 case EOW:
899 (void) printf("EOW\n");
900 break;
901 case REF:
902 (void) printf("REF: %d\n",*ap++);
903 break;
904 case CCL:
905 (void) printf("\tCCL [");
906 for (n = 0; n < MAXCHR; n++)
907 if (isinset(ap,(CHAR)n)) {
908 if (n < ' ')
909 (void) printf("^%c", n ^ 0x040);
910 else
911 (void) printf("%c", n);
912 }
913 (void) printf("]\n");
914 ap += BITBLK;
915 break;
916 default:
917 (void) printf("bad nfa. opcode %o\n", ap[-1]);
918 exit(1);
919 break;
920 }
921 }
922 #endif
923 #endif /* MACOS or DOS or NEED_BSDREGEX */
924