10Sstevel@tonic-gate /*
20Sstevel@tonic-gate *
3*3857Sstevel * Portions Copyright 1998 Sun Microsystems, Inc. All rights reserved.
4*3857Sstevel * Use is subject to license terms.
50Sstevel@tonic-gate *
60Sstevel@tonic-gate */
70Sstevel@tonic-gate
80Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
90Sstevel@tonic-gate
100Sstevel@tonic-gate #include "portable.h"
110Sstevel@tonic-gate #include "stdio.h"
120Sstevel@tonic-gate #include "stdlib.h"
130Sstevel@tonic-gate
140Sstevel@tonic-gate #if defined( MACOS ) || defined( DOS ) || defined( _WIN32 ) || defined( NEED_BSDREGEX )
150Sstevel@tonic-gate #include "regex.h"
160Sstevel@tonic-gate
170Sstevel@tonic-gate /*
180Sstevel@tonic-gate * regex - Regular expression pattern matching and replacement
190Sstevel@tonic-gate *
200Sstevel@tonic-gate * By: Ozan S. Yigit (oz)
210Sstevel@tonic-gate * Dept. of Computer Science
220Sstevel@tonic-gate * York University
230Sstevel@tonic-gate *
240Sstevel@tonic-gate * These routines are the PUBLIC DOMAIN equivalents of regex
250Sstevel@tonic-gate * routines as found in 4.nBSD UN*X, with minor extensions.
260Sstevel@tonic-gate *
270Sstevel@tonic-gate * These routines are derived from various implementations found
280Sstevel@tonic-gate * in software tools books, and Conroy's grep. They are NOT derived
290Sstevel@tonic-gate * from licensed/restricted software.
300Sstevel@tonic-gate * For more interesting/academic/complicated implementations,
310Sstevel@tonic-gate * see Henry Spencer's regexp routines, or GNU Emacs pattern
320Sstevel@tonic-gate * matching module.
330Sstevel@tonic-gate *
340Sstevel@tonic-gate * Modification history:
350Sstevel@tonic-gate *
360Sstevel@tonic-gate * $Log: regex.c,v $
370Sstevel@tonic-gate * Revision 1.12 1996/04/25 16:20:59 mcs
380Sstevel@tonic-gate * make re_exec() match "" with ".*" and similar patterns
390Sstevel@tonic-gate * hopefully this change doesn't break anything else!
400Sstevel@tonic-gate *
410Sstevel@tonic-gate * Revision 1.11 1994/12/14 21:33:45 mcs
420Sstevel@tonic-gate * use new NEED_BSDREGEX
430Sstevel@tonic-gate * fix pmatch() prototype
440Sstevel@tonic-gate *
450Sstevel@tonic-gate * Revision 1.10 1994/12/12 18:16:39 mcs
460Sstevel@tonic-gate * use on NetBSD
470Sstevel@tonic-gate *
480Sstevel@tonic-gate * Revision 1.9 1994/11/15 19:16:35 mcs
490Sstevel@tonic-gate * add (CHAR) cast to make VisualC++ happy
500Sstevel@tonic-gate *
510Sstevel@tonic-gate * Revision 1.8 1994/11/08 21:14:32 mcs
520Sstevel@tonic-gate * WIN32 changes
530Sstevel@tonic-gate *
540Sstevel@tonic-gate * Revision 1.7 1994/07/23 19:51:24 mcs
550Sstevel@tonic-gate * use ANSI-style inline function parameters
560Sstevel@tonic-gate *
570Sstevel@tonic-gate * Revision 1.6 1993/10/18 01:52:32 tim
580Sstevel@tonic-gate * include for VMS
590Sstevel@tonic-gate *
600Sstevel@tonic-gate * Revision 1.5 1993/09/28 21:37:54 mcs
610Sstevel@tonic-gate * HP/UX needs the regex we include (not in its libc)
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * Revision 1.4 1993/08/27 15:59:52 mcs
640Sstevel@tonic-gate * use CHAR for deftab
650Sstevel@tonic-gate *
660Sstevel@tonic-gate * Revision 1.3 1993/08/27 15:49:47 mcs
670Sstevel@tonic-gate * added missing 0 to octal constants
680Sstevel@tonic-gate * use unsigned char for CHAR under DOS
690Sstevel@tonic-gate *
700Sstevel@tonic-gate * Revision 1.2 1993/08/27 14:57:48 mcs
710Sstevel@tonic-gate * add proto. for pmatch
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * Revision 1.1 1993/08/18 21:20:02 mcs
740Sstevel@tonic-gate * Initial revision
750Sstevel@tonic-gate *
760Sstevel@tonic-gate * Revision 1.4 1991/10/17 03:56:42 oz
770Sstevel@tonic-gate * miscellaneous changes, small cleanups etc.
780Sstevel@tonic-gate *
790Sstevel@tonic-gate * Revision 1.3 1989/04/01 14:18:09 oz
800Sstevel@tonic-gate * Change all references to a dfa: this is actually an nfa.
810Sstevel@tonic-gate *
820Sstevel@tonic-gate * Revision 1.2 88/08/28 15:36:04 oz
830Sstevel@tonic-gate * Use a complement bitmap to represent NCL.
840Sstevel@tonic-gate * This removes the need to have seperate
850Sstevel@tonic-gate * code in the pmatch case block - it is
860Sstevel@tonic-gate * just CCL code now.
870Sstevel@tonic-gate *
880Sstevel@tonic-gate * Use the actual CCL code in the CLO
890Sstevel@tonic-gate * section of pmatch. No need for a recursive
900Sstevel@tonic-gate * pmatch call.
910Sstevel@tonic-gate *
920Sstevel@tonic-gate * Use a bitmap table to set char bits in an
930Sstevel@tonic-gate * 8-bit chunk.
940Sstevel@tonic-gate *
950Sstevel@tonic-gate * Interfaces:
960Sstevel@tonic-gate * re_comp: compile a regular expression into a NFA.
970Sstevel@tonic-gate *
980Sstevel@tonic-gate * char *re_comp(s)
990Sstevel@tonic-gate * char *s;
1000Sstevel@tonic-gate *
1010Sstevel@tonic-gate * re_exec: execute the NFA to match a pattern.
1020Sstevel@tonic-gate *
1030Sstevel@tonic-gate * int re_exec(s)
1040Sstevel@tonic-gate * char *s;
1050Sstevel@tonic-gate *
1060Sstevel@tonic-gate * re_modw change re_exec's understanding of what a "word"
1070Sstevel@tonic-gate * looks like (for \< and \>) by adding into the
1080Sstevel@tonic-gate * hidden word-syntax table.
1090Sstevel@tonic-gate *
1100Sstevel@tonic-gate * void re_modw(s)
1110Sstevel@tonic-gate * char *s;
1120Sstevel@tonic-gate *
1130Sstevel@tonic-gate * re_subs: substitute the matched portions in a new string.
1140Sstevel@tonic-gate *
1150Sstevel@tonic-gate * int re_subs(src, dst)
1160Sstevel@tonic-gate * char *src;
1170Sstevel@tonic-gate * char *dst;
1180Sstevel@tonic-gate *
1190Sstevel@tonic-gate * re_fail: failure routine for re_exec.
1200Sstevel@tonic-gate *
1210Sstevel@tonic-gate * void re_fail(msg, op)
1220Sstevel@tonic-gate * char *msg;
1230Sstevel@tonic-gate * char op;
1240Sstevel@tonic-gate *
1250Sstevel@tonic-gate * Regular Expressions:
1260Sstevel@tonic-gate *
1270Sstevel@tonic-gate * [1] char matches itself, unless it is a special
1280Sstevel@tonic-gate * character (metachar): . \ [ ] * + ^ $
1290Sstevel@tonic-gate *
1300Sstevel@tonic-gate * [2] . matches any character.
1310Sstevel@tonic-gate *
1320Sstevel@tonic-gate * [3] \ matches the character following it, except
1330Sstevel@tonic-gate * when followed by a left or right round bracket,
1340Sstevel@tonic-gate * a digit 1 to 9 or a left or right angle bracket.
1350Sstevel@tonic-gate * (see [7], [8] and [9])
1360Sstevel@tonic-gate * It is used as an escape character for all
1370Sstevel@tonic-gate * other meta-characters, and itself. When used
1380Sstevel@tonic-gate * in a set ([4]), it is treated as an ordinary
1390Sstevel@tonic-gate * character.
1400Sstevel@tonic-gate *
1410Sstevel@tonic-gate * [4] [set] matches one of the characters in the set.
1420Sstevel@tonic-gate * If the first character in the set is "^",
1430Sstevel@tonic-gate * it matches a character NOT in the set, i.e.
1440Sstevel@tonic-gate * complements the set. A shorthand S-E is
1450Sstevel@tonic-gate * used to specify a set of characters S upto
1460Sstevel@tonic-gate * E, inclusive. The special characters "]" and
1470Sstevel@tonic-gate * "-" have no special meaning if they appear
1480Sstevel@tonic-gate * as the first chars in the set.
1490Sstevel@tonic-gate * examples: match:
1500Sstevel@tonic-gate *
1510Sstevel@tonic-gate * [a-z] any lowercase alpha
1520Sstevel@tonic-gate *
1530Sstevel@tonic-gate * [^]-] any char except ] and -
1540Sstevel@tonic-gate *
1550Sstevel@tonic-gate * [^A-Z] any char except uppercase
1560Sstevel@tonic-gate * alpha
1570Sstevel@tonic-gate *
1580Sstevel@tonic-gate * [a-zA-Z] any alpha
1590Sstevel@tonic-gate *
1600Sstevel@tonic-gate * [5] * any regular expression form [1] to [4], followed by
1610Sstevel@tonic-gate * closure char (*) matches zero or more matches of
1620Sstevel@tonic-gate * that form.
1630Sstevel@tonic-gate *
1640Sstevel@tonic-gate * [6] + same as [5], except it matches one or more.
1650Sstevel@tonic-gate *
1660Sstevel@tonic-gate * [7] a regular expression in the form [1] to [10], enclosed
1670Sstevel@tonic-gate * as \(form\) matches what form matches. The enclosure
1680Sstevel@tonic-gate * creates a set of tags, used for [8] and for
1690Sstevel@tonic-gate * pattern substution. The tagged forms are numbered
1700Sstevel@tonic-gate * starting from 1.
1710Sstevel@tonic-gate *
1720Sstevel@tonic-gate * [8] a \ followed by a digit 1 to 9 matches whatever a
1730Sstevel@tonic-gate * previously tagged regular expression ([7]) matched.
1740Sstevel@tonic-gate *
1750Sstevel@tonic-gate * [9] \< a regular expression starting with a \< construct
1760Sstevel@tonic-gate * \> and/or ending with a \> construct, restricts the
1770Sstevel@tonic-gate * pattern matching to the beginning of a word, and/or
1780Sstevel@tonic-gate * the end of a word. A word is defined to be a character
1790Sstevel@tonic-gate * string beginning and/or ending with the characters
1800Sstevel@tonic-gate * A-Z a-z 0-9 and _. It must also be preceded and/or
1810Sstevel@tonic-gate * followed by any character outside those mentioned.
1820Sstevel@tonic-gate *
1830Sstevel@tonic-gate * [10] a composite regular expression xy where x and y
1840Sstevel@tonic-gate * are in the form [1] to [10] matches the longest
1850Sstevel@tonic-gate * match of x followed by a match for y.
1860Sstevel@tonic-gate *
1870Sstevel@tonic-gate * [11] ^ a regular expression starting with a ^ character
1880Sstevel@tonic-gate * $ and/or ending with a $ character, restricts the
1890Sstevel@tonic-gate * pattern matching to the beginning of the line,
1900Sstevel@tonic-gate * or the end of line. [anchors] Elsewhere in the
1910Sstevel@tonic-gate * pattern, ^ and $ are treated as ordinary characters.
1920Sstevel@tonic-gate *
1930Sstevel@tonic-gate *
1940Sstevel@tonic-gate * Acknowledgements:
1950Sstevel@tonic-gate *
1960Sstevel@tonic-gate * HCR's Hugh Redelmeier has been most helpful in various
1970Sstevel@tonic-gate * stages of development. He convinced me to include BOW
1980Sstevel@tonic-gate * and EOW constructs, originally invented by Rob Pike at
1990Sstevel@tonic-gate * the University of Toronto.
2000Sstevel@tonic-gate *
2010Sstevel@tonic-gate * References:
2020Sstevel@tonic-gate * Software tools Kernighan & Plauger
2030Sstevel@tonic-gate * Software tools in Pascal Kernighan & Plauger
2040Sstevel@tonic-gate * Grep [rsx-11 C dist] David Conroy
2050Sstevel@tonic-gate * ed - text editor Un*x Programmer's Manual
2060Sstevel@tonic-gate * Advanced editing on Un*x B. W. Kernighan
2070Sstevel@tonic-gate * RegExp routines Henry Spencer
2080Sstevel@tonic-gate *
2090Sstevel@tonic-gate * Notes:
2100Sstevel@tonic-gate *
2110Sstevel@tonic-gate * This implementation uses a bit-set representation for character
2120Sstevel@tonic-gate * classes for speed and compactness. Each character is represented
2130Sstevel@tonic-gate * by one bit in a 128-bit block. Thus, CCL always takes a
2140Sstevel@tonic-gate * constant 16 bytes in the internal nfa, and re_exec does a single
2150Sstevel@tonic-gate * bit comparison to locate the character in the set.
2160Sstevel@tonic-gate *
2170Sstevel@tonic-gate * Examples:
2180Sstevel@tonic-gate *
2190Sstevel@tonic-gate * pattern: foo*.*
2200Sstevel@tonic-gate * compile: CHR f CHR o CLO CHR o END CLO ANY END END
2210Sstevel@tonic-gate * matches: fo foo fooo foobar fobar foxx ...
2220Sstevel@tonic-gate *
2230Sstevel@tonic-gate * pattern: fo[ob]a[rz]
2240Sstevel@tonic-gate * compile: CHR f CHR o CCL bitset CHR a CCL bitset END
2250Sstevel@tonic-gate * matches: fobar fooar fobaz fooaz
2260Sstevel@tonic-gate *
2270Sstevel@tonic-gate * pattern: foo\\+
2280Sstevel@tonic-gate * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END
2290Sstevel@tonic-gate * matches: foo\ foo\\ foo\\\ ...
2300Sstevel@tonic-gate *
2310Sstevel@tonic-gate * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo)
2320Sstevel@tonic-gate * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
2330Sstevel@tonic-gate * matches: foo1foo foo2foo foo3foo
2340Sstevel@tonic-gate *
2350Sstevel@tonic-gate * pattern: \(fo.*\)-\1
2360Sstevel@tonic-gate * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
2370Sstevel@tonic-gate * matches: foo-foo fo-fo fob-fob foobar-foobar ...
2380Sstevel@tonic-gate */
2390Sstevel@tonic-gate
2400Sstevel@tonic-gate #define MAXNFA 1024
2410Sstevel@tonic-gate #define MAXTAG 10
2420Sstevel@tonic-gate
2430Sstevel@tonic-gate #define OKP 1
2440Sstevel@tonic-gate #define NOP 0
2450Sstevel@tonic-gate
2460Sstevel@tonic-gate #define CHR 1
2470Sstevel@tonic-gate #define ANY 2
2480Sstevel@tonic-gate #define CCL 3
2490Sstevel@tonic-gate #define BOL 4
2500Sstevel@tonic-gate #define EOL 5
2510Sstevel@tonic-gate #define BOT 6
2520Sstevel@tonic-gate #define EOT 7
2530Sstevel@tonic-gate #define BOW 8
2540Sstevel@tonic-gate #define EOW 9
2550Sstevel@tonic-gate #define REF 10
2560Sstevel@tonic-gate #define CLO 11
2570Sstevel@tonic-gate
2580Sstevel@tonic-gate #define END 0
2590Sstevel@tonic-gate
2600Sstevel@tonic-gate /*
2610Sstevel@tonic-gate * The following defines are not meant to be changeable.
2620Sstevel@tonic-gate * They are for readability only.
2630Sstevel@tonic-gate */
2640Sstevel@tonic-gate #define MAXCHR 128
2650Sstevel@tonic-gate #define CHRBIT 8
2660Sstevel@tonic-gate #define BITBLK MAXCHR/CHRBIT
2670Sstevel@tonic-gate #define BLKIND 0170
2680Sstevel@tonic-gate #define BITIND 07
2690Sstevel@tonic-gate
2700Sstevel@tonic-gate #define ASCIIB 0177
2710Sstevel@tonic-gate
2720Sstevel@tonic-gate #if defined( DOS ) || defined( _WIN32 ) || defined(SUN)
2730Sstevel@tonic-gate typedef unsigned char CHAR;
2740Sstevel@tonic-gate #else /* DOS */
2750Sstevel@tonic-gate typedef /*unsigned*/ char CHAR;
2760Sstevel@tonic-gate #endif /* DOS */
2770Sstevel@tonic-gate
2780Sstevel@tonic-gate static int tagstk[MAXTAG]; /* subpat tag stack..*/
2790Sstevel@tonic-gate static CHAR nfa[MAXNFA]; /* automaton.. */
2800Sstevel@tonic-gate static int sta = NOP; /* status of lastpat */
2810Sstevel@tonic-gate
2820Sstevel@tonic-gate static CHAR bittab[BITBLK]; /* bit table for CCL */
2830Sstevel@tonic-gate /* pre-set bits... */
2840Sstevel@tonic-gate static CHAR bitarr[] = {1,2,4,8,16,32,64,128};
2850Sstevel@tonic-gate
2860Sstevel@tonic-gate #ifdef DEBUG
2870Sstevel@tonic-gate static void nfadump(CHAR *);
2880Sstevel@tonic-gate void symbolic(char *);
2890Sstevel@tonic-gate #endif
2900Sstevel@tonic-gate
2910Sstevel@tonic-gate static void
chset(CHAR c)2920Sstevel@tonic-gate chset(CHAR c)
2930Sstevel@tonic-gate {
2940Sstevel@tonic-gate bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND];
2950Sstevel@tonic-gate }
2960Sstevel@tonic-gate
2970Sstevel@tonic-gate #define badpat(x) (*nfa = END, x)
2980Sstevel@tonic-gate #define store(x) *mp++ = x
2990Sstevel@tonic-gate
3000Sstevel@tonic-gate char *
re_comp(char * pat)3010Sstevel@tonic-gate re_comp( char *pat )
3020Sstevel@tonic-gate {
3030Sstevel@tonic-gate register char *p; /* pattern pointer */
3040Sstevel@tonic-gate register CHAR *mp=nfa; /* nfa pointer */
3050Sstevel@tonic-gate register CHAR *lp; /* saved pointer.. */
3060Sstevel@tonic-gate register CHAR *sp=nfa; /* another one.. */
3070Sstevel@tonic-gate
3080Sstevel@tonic-gate register int tagi = 0; /* tag stack index */
3090Sstevel@tonic-gate register int tagc = 1; /* actual tag count */
3100Sstevel@tonic-gate
3110Sstevel@tonic-gate register int n;
3120Sstevel@tonic-gate register CHAR mask; /* xor mask -CCL/NCL */
3130Sstevel@tonic-gate int c1, c2;
3140Sstevel@tonic-gate
3150Sstevel@tonic-gate if (!pat || !*pat)
3160Sstevel@tonic-gate if (sta)
3170Sstevel@tonic-gate return 0;
3180Sstevel@tonic-gate else
3190Sstevel@tonic-gate return badpat("No previous regular expression");
3200Sstevel@tonic-gate sta = NOP;
3210Sstevel@tonic-gate
3220Sstevel@tonic-gate for (p = pat; *p; p++) {
3230Sstevel@tonic-gate lp = mp;
3240Sstevel@tonic-gate switch(*p) {
3250Sstevel@tonic-gate
3260Sstevel@tonic-gate case '.': /* match any char.. */
3270Sstevel@tonic-gate store(ANY);
3280Sstevel@tonic-gate break;
3290Sstevel@tonic-gate
3300Sstevel@tonic-gate case '^': /* match beginning.. */
3310Sstevel@tonic-gate if (p == pat)
3320Sstevel@tonic-gate store(BOL);
3330Sstevel@tonic-gate else {
3340Sstevel@tonic-gate store(CHR);
3350Sstevel@tonic-gate store(*p);
3360Sstevel@tonic-gate }
3370Sstevel@tonic-gate break;
3380Sstevel@tonic-gate
3390Sstevel@tonic-gate case '$': /* match endofline.. */
3400Sstevel@tonic-gate if (!*(p+1))
3410Sstevel@tonic-gate store(EOL);
3420Sstevel@tonic-gate else {
3430Sstevel@tonic-gate store(CHR);
3440Sstevel@tonic-gate store(*p);
3450Sstevel@tonic-gate }
3460Sstevel@tonic-gate break;
3470Sstevel@tonic-gate
3480Sstevel@tonic-gate case '[': /* match char class..*/
3490Sstevel@tonic-gate store(CCL);
3500Sstevel@tonic-gate
3510Sstevel@tonic-gate if (*++p == '^') {
3520Sstevel@tonic-gate mask = 0377;
3530Sstevel@tonic-gate p++;
3540Sstevel@tonic-gate }
3550Sstevel@tonic-gate else
3560Sstevel@tonic-gate mask = 0;
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate if (*p == '-') /* real dash */
3590Sstevel@tonic-gate chset(*p++);
3600Sstevel@tonic-gate if (*p == ']') /* real brac */
3610Sstevel@tonic-gate chset(*p++);
3620Sstevel@tonic-gate while (*p && *p != ']') {
3630Sstevel@tonic-gate if (*p == '-' && *(p+1) && *(p+1) != ']') {
3640Sstevel@tonic-gate p++;
3650Sstevel@tonic-gate c1 = *(p-2) + 1;
3660Sstevel@tonic-gate c2 = *p++;
3670Sstevel@tonic-gate while (c1 <= c2)
3680Sstevel@tonic-gate chset((CHAR)c1++);
3690Sstevel@tonic-gate }
3700Sstevel@tonic-gate #ifdef EXTEND
3710Sstevel@tonic-gate else if (*p == '\\' && *(p+1)) {
3720Sstevel@tonic-gate p++;
3730Sstevel@tonic-gate chset(*p++);
3740Sstevel@tonic-gate }
3750Sstevel@tonic-gate #endif
3760Sstevel@tonic-gate else
3770Sstevel@tonic-gate chset(*p++);
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate if (!*p)
3800Sstevel@tonic-gate return badpat("Missing ]");
3810Sstevel@tonic-gate
3820Sstevel@tonic-gate for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
3830Sstevel@tonic-gate store(mask ^ bittab[n]);
3840Sstevel@tonic-gate
3850Sstevel@tonic-gate break;
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate case '*': /* match 0 or more.. */
3880Sstevel@tonic-gate case '+': /* match 1 or more.. */
3890Sstevel@tonic-gate if (p == pat)
3900Sstevel@tonic-gate return badpat("Empty closure");
3910Sstevel@tonic-gate lp = sp; /* previous opcode */
3920Sstevel@tonic-gate if (*lp == CLO) /* equivalence.. */
3930Sstevel@tonic-gate break;
3940Sstevel@tonic-gate switch(*lp) {
3950Sstevel@tonic-gate
3960Sstevel@tonic-gate case BOL:
3970Sstevel@tonic-gate case BOT:
3980Sstevel@tonic-gate case EOT:
3990Sstevel@tonic-gate case BOW:
4000Sstevel@tonic-gate case EOW:
4010Sstevel@tonic-gate case REF:
4020Sstevel@tonic-gate return badpat("Illegal closure");
4030Sstevel@tonic-gate default:
4040Sstevel@tonic-gate break;
4050Sstevel@tonic-gate }
4060Sstevel@tonic-gate
4070Sstevel@tonic-gate if (*p == '+')
4080Sstevel@tonic-gate for (sp = mp; lp < sp; lp++)
4090Sstevel@tonic-gate store(*lp);
4100Sstevel@tonic-gate
4110Sstevel@tonic-gate store(END);
4120Sstevel@tonic-gate store(END);
4130Sstevel@tonic-gate sp = mp;
4140Sstevel@tonic-gate while (--mp > lp)
4150Sstevel@tonic-gate *mp = mp[-1];
4160Sstevel@tonic-gate store(CLO);
4170Sstevel@tonic-gate mp = sp;
4180Sstevel@tonic-gate break;
4190Sstevel@tonic-gate
4200Sstevel@tonic-gate case '\\': /* tags, backrefs .. */
4210Sstevel@tonic-gate switch(*++p) {
4220Sstevel@tonic-gate
4230Sstevel@tonic-gate case '(':
4240Sstevel@tonic-gate if (tagc < MAXTAG) {
4250Sstevel@tonic-gate tagstk[++tagi] = tagc;
4260Sstevel@tonic-gate store(BOT);
4270Sstevel@tonic-gate store(tagc++);
4280Sstevel@tonic-gate }
4290Sstevel@tonic-gate else
4300Sstevel@tonic-gate return badpat("Too many \\(\\) pairs");
4310Sstevel@tonic-gate break;
4320Sstevel@tonic-gate case ')':
4330Sstevel@tonic-gate if (*sp == BOT)
4340Sstevel@tonic-gate return badpat("Null pattern inside \\(\\)");
4350Sstevel@tonic-gate if (tagi > 0) {
4360Sstevel@tonic-gate store(EOT);
4370Sstevel@tonic-gate store(tagstk[tagi--]);
4380Sstevel@tonic-gate }
4390Sstevel@tonic-gate else
4400Sstevel@tonic-gate return badpat("Unmatched \\)");
4410Sstevel@tonic-gate break;
4420Sstevel@tonic-gate case '<':
4430Sstevel@tonic-gate store(BOW);
4440Sstevel@tonic-gate break;
4450Sstevel@tonic-gate case '>':
4460Sstevel@tonic-gate if (*sp == BOW)
4470Sstevel@tonic-gate return badpat("Null pattern inside \\<\\>");
4480Sstevel@tonic-gate store(EOW);
4490Sstevel@tonic-gate break;
4500Sstevel@tonic-gate case '1':
4510Sstevel@tonic-gate case '2':
4520Sstevel@tonic-gate case '3':
4530Sstevel@tonic-gate case '4':
4540Sstevel@tonic-gate case '5':
4550Sstevel@tonic-gate case '6':
4560Sstevel@tonic-gate case '7':
4570Sstevel@tonic-gate case '8':
4580Sstevel@tonic-gate case '9':
4590Sstevel@tonic-gate n = *p-'0';
4600Sstevel@tonic-gate if (tagi > 0 && tagstk[tagi] == n)
4610Sstevel@tonic-gate return badpat("Cyclical reference");
4620Sstevel@tonic-gate if (tagc > n) {
4630Sstevel@tonic-gate store(REF);
4640Sstevel@tonic-gate store(n);
4650Sstevel@tonic-gate }
4660Sstevel@tonic-gate else
4670Sstevel@tonic-gate return badpat("Undetermined reference");
4680Sstevel@tonic-gate break;
4690Sstevel@tonic-gate #ifdef EXTEND
4700Sstevel@tonic-gate case 'b':
4710Sstevel@tonic-gate store(CHR);
4720Sstevel@tonic-gate store('\b');
4730Sstevel@tonic-gate break;
4740Sstevel@tonic-gate case 'n':
4750Sstevel@tonic-gate store(CHR);
4760Sstevel@tonic-gate store('\n');
4770Sstevel@tonic-gate break;
4780Sstevel@tonic-gate case 'f':
4790Sstevel@tonic-gate store(CHR);
4800Sstevel@tonic-gate store('\f');
4810Sstevel@tonic-gate break;
4820Sstevel@tonic-gate case 'r':
4830Sstevel@tonic-gate store(CHR);
4840Sstevel@tonic-gate store('\r');
4850Sstevel@tonic-gate break;
4860Sstevel@tonic-gate case 't':
4870Sstevel@tonic-gate store(CHR);
4880Sstevel@tonic-gate store('\t');
4890Sstevel@tonic-gate break;
4900Sstevel@tonic-gate #endif
4910Sstevel@tonic-gate default:
4920Sstevel@tonic-gate store(CHR);
4930Sstevel@tonic-gate store(*p);
4940Sstevel@tonic-gate }
4950Sstevel@tonic-gate break;
4960Sstevel@tonic-gate
4970Sstevel@tonic-gate default : /* an ordinary char */
4980Sstevel@tonic-gate store(CHR);
4990Sstevel@tonic-gate store(*p);
5000Sstevel@tonic-gate break;
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate sp = lp;
5030Sstevel@tonic-gate }
5040Sstevel@tonic-gate if (tagi > 0)
5050Sstevel@tonic-gate return badpat("Unmatched \\(");
5060Sstevel@tonic-gate store(END);
5070Sstevel@tonic-gate sta = OKP;
5080Sstevel@tonic-gate return 0;
5090Sstevel@tonic-gate }
5100Sstevel@tonic-gate
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate static char *bol;
5130Sstevel@tonic-gate char *bopat[MAXTAG];
5140Sstevel@tonic-gate char *eopat[MAXTAG];
5150Sstevel@tonic-gate #ifdef NEEDPROTOS
5160Sstevel@tonic-gate static char *pmatch( char *lp, CHAR *ap );
5170Sstevel@tonic-gate #else /* NEEDPROTOS */
5180Sstevel@tonic-gate static char *pmatch();
5190Sstevel@tonic-gate #endif /* NEEDPROTOS */
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate /*
5220Sstevel@tonic-gate * re_exec:
5230Sstevel@tonic-gate * execute nfa to find a match.
5240Sstevel@tonic-gate *
5250Sstevel@tonic-gate * special cases: (nfa[0])
5260Sstevel@tonic-gate * BOL
5270Sstevel@tonic-gate * Match only once, starting from the
5280Sstevel@tonic-gate * beginning.
5290Sstevel@tonic-gate * CHR
5300Sstevel@tonic-gate * First locate the character without
5310Sstevel@tonic-gate * calling pmatch, and if found, call
5320Sstevel@tonic-gate * pmatch for the remaining string.
5330Sstevel@tonic-gate * END
5340Sstevel@tonic-gate * re_comp failed, poor luser did not
5350Sstevel@tonic-gate * check for it. Fail fast.
5360Sstevel@tonic-gate *
5370Sstevel@tonic-gate * If a match is found, bopat[0] and eopat[0] are set
5380Sstevel@tonic-gate * to the beginning and the end of the matched fragment,
5390Sstevel@tonic-gate * respectively.
5400Sstevel@tonic-gate *
5410Sstevel@tonic-gate */
5420Sstevel@tonic-gate
5430Sstevel@tonic-gate int
re_exec(char * lp)5440Sstevel@tonic-gate re_exec( char *lp )
5450Sstevel@tonic-gate {
5460Sstevel@tonic-gate register char c;
5470Sstevel@tonic-gate register char *ep = 0;
5480Sstevel@tonic-gate register CHAR *ap = nfa;
5490Sstevel@tonic-gate
5500Sstevel@tonic-gate bol = lp;
5510Sstevel@tonic-gate
5520Sstevel@tonic-gate bopat[0] = 0;
5530Sstevel@tonic-gate bopat[1] = 0;
5540Sstevel@tonic-gate bopat[2] = 0;
5550Sstevel@tonic-gate bopat[3] = 0;
5560Sstevel@tonic-gate bopat[4] = 0;
5570Sstevel@tonic-gate bopat[5] = 0;
5580Sstevel@tonic-gate bopat[6] = 0;
5590Sstevel@tonic-gate bopat[7] = 0;
5600Sstevel@tonic-gate bopat[8] = 0;
5610Sstevel@tonic-gate bopat[9] = 0;
5620Sstevel@tonic-gate
5630Sstevel@tonic-gate switch(*ap) {
5640Sstevel@tonic-gate
5650Sstevel@tonic-gate case BOL: /* anchored: match from BOL only */
5660Sstevel@tonic-gate ep = pmatch(lp,ap);
5670Sstevel@tonic-gate break;
5680Sstevel@tonic-gate case CHR: /* ordinary char: locate it fast */
5690Sstevel@tonic-gate c = *(ap+1);
5700Sstevel@tonic-gate while (*lp && *lp != c)
5710Sstevel@tonic-gate lp++;
5720Sstevel@tonic-gate if (!*lp) /* if EOS, fail, else fall thru. */
5730Sstevel@tonic-gate return 0;
5740Sstevel@tonic-gate default: /* regular matching all the way. */
5750Sstevel@tonic-gate do {
5760Sstevel@tonic-gate if ((ep = pmatch(lp,ap)))
5770Sstevel@tonic-gate break;
5780Sstevel@tonic-gate lp++;
5790Sstevel@tonic-gate } while (*lp);
5800Sstevel@tonic-gate
5810Sstevel@tonic-gate break;
5820Sstevel@tonic-gate case END: /* munged automaton. fail always */
5830Sstevel@tonic-gate return 0;
5840Sstevel@tonic-gate }
5850Sstevel@tonic-gate if (!ep)
5860Sstevel@tonic-gate return 0;
5870Sstevel@tonic-gate
5880Sstevel@tonic-gate bopat[0] = lp;
5890Sstevel@tonic-gate eopat[0] = ep;
5900Sstevel@tonic-gate return 1;
5910Sstevel@tonic-gate }
5920Sstevel@tonic-gate
5930Sstevel@tonic-gate /*
5940Sstevel@tonic-gate * pmatch: internal routine for the hard part
5950Sstevel@tonic-gate *
5960Sstevel@tonic-gate * This code is partly snarfed from an early grep written by
5970Sstevel@tonic-gate * David Conroy. The backref and tag stuff, and various other
5980Sstevel@tonic-gate * innovations are by oz.
5990Sstevel@tonic-gate *
6000Sstevel@tonic-gate * special case optimizations: (nfa[n], nfa[n+1])
6010Sstevel@tonic-gate * CLO ANY
6020Sstevel@tonic-gate * We KNOW .* will match everything upto the
6030Sstevel@tonic-gate * end of line. Thus, directly go to the end of
6040Sstevel@tonic-gate * line, without recursive pmatch calls. As in
6050Sstevel@tonic-gate * the other closure cases, the remaining pattern
6060Sstevel@tonic-gate * must be matched by moving backwards on the
6070Sstevel@tonic-gate * string recursively, to find a match for xy
6080Sstevel@tonic-gate * (x is ".*" and y is the remaining pattern)
6090Sstevel@tonic-gate * where the match satisfies the LONGEST match for
6100Sstevel@tonic-gate * x followed by a match for y.
6110Sstevel@tonic-gate * CLO CHR
6120Sstevel@tonic-gate * We can again scan the string forward for the
6130Sstevel@tonic-gate * single char and at the point of failure, we
6140Sstevel@tonic-gate * execute the remaining nfa recursively, same as
6150Sstevel@tonic-gate * above.
6160Sstevel@tonic-gate *
6170Sstevel@tonic-gate * At the end of a successful match, bopat[n] and eopat[n]
6180Sstevel@tonic-gate * are set to the beginning and end of subpatterns matched
6190Sstevel@tonic-gate * by tagged expressions (n = 1 to 9).
6200Sstevel@tonic-gate *
6210Sstevel@tonic-gate */
6220Sstevel@tonic-gate
6230Sstevel@tonic-gate #ifndef re_fail
6240Sstevel@tonic-gate extern void re_fail();
6250Sstevel@tonic-gate #endif /* re_fail */
6260Sstevel@tonic-gate
6270Sstevel@tonic-gate /*
6280Sstevel@tonic-gate * character classification table for word boundary operators BOW
6290Sstevel@tonic-gate * and EOW. the reason for not using ctype macros is that we can
6300Sstevel@tonic-gate * let the user add into our own table. see re_modw. This table
6310Sstevel@tonic-gate * is not in the bitset form, since we may wish to extend it in the
6320Sstevel@tonic-gate * future for other character classifications.
6330Sstevel@tonic-gate *
6340Sstevel@tonic-gate * TRUE for 0-9 A-Z a-z _
6350Sstevel@tonic-gate */
6360Sstevel@tonic-gate static char chrtyp[MAXCHR] = {
6370Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6380Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6390Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6400Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6410Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
6420Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
6430Sstevel@tonic-gate 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
6440Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6450Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6460Sstevel@tonic-gate 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
6470Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6480Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6490Sstevel@tonic-gate 1, 1, 1, 0, 0, 0, 0, 0
6500Sstevel@tonic-gate };
6510Sstevel@tonic-gate
6520Sstevel@tonic-gate #define inascii(x) (0177&(x))
6530Sstevel@tonic-gate #define iswordc(x) chrtyp[inascii(x)]
6540Sstevel@tonic-gate #define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
6550Sstevel@tonic-gate
6560Sstevel@tonic-gate /*
6570Sstevel@tonic-gate * skip values for CLO XXX to skip past the closure
6580Sstevel@tonic-gate */
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate #define ANYSKIP 2 /* [CLO] ANY END ... */
6610Sstevel@tonic-gate #define CHRSKIP 3 /* [CLO] CHR chr END ... */
6620Sstevel@tonic-gate #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
6630Sstevel@tonic-gate
6640Sstevel@tonic-gate static char *
pmatch(char * lp,CHAR * ap)6650Sstevel@tonic-gate pmatch( char *lp, CHAR *ap)
6660Sstevel@tonic-gate {
6670Sstevel@tonic-gate register int op, c, n;
6680Sstevel@tonic-gate register char *e; /* extra pointer for CLO */
6690Sstevel@tonic-gate register char *bp; /* beginning of subpat.. */
6700Sstevel@tonic-gate register char *ep; /* ending of subpat.. */
6710Sstevel@tonic-gate char *are; /* to save the line ptr. */
6720Sstevel@tonic-gate
6730Sstevel@tonic-gate while ((op = *ap++) != END)
6740Sstevel@tonic-gate switch(op) {
6750Sstevel@tonic-gate
6760Sstevel@tonic-gate case CHR:
6770Sstevel@tonic-gate if (*lp++ != *ap++)
6780Sstevel@tonic-gate return 0;
6790Sstevel@tonic-gate break;
6800Sstevel@tonic-gate case ANY:
6810Sstevel@tonic-gate if (!*lp++)
6820Sstevel@tonic-gate return 0;
6830Sstevel@tonic-gate break;
6840Sstevel@tonic-gate case CCL:
6850Sstevel@tonic-gate c = *lp++;
6860Sstevel@tonic-gate if (!isinset(ap,c))
6870Sstevel@tonic-gate return 0;
6880Sstevel@tonic-gate ap += BITBLK;
6890Sstevel@tonic-gate break;
6900Sstevel@tonic-gate case BOL:
6910Sstevel@tonic-gate if (lp != bol)
6920Sstevel@tonic-gate return 0;
6930Sstevel@tonic-gate break;
6940Sstevel@tonic-gate case EOL:
6950Sstevel@tonic-gate if (*lp)
6960Sstevel@tonic-gate return 0;
6970Sstevel@tonic-gate break;
6980Sstevel@tonic-gate case BOT:
6990Sstevel@tonic-gate bopat[*ap++] = lp;
7000Sstevel@tonic-gate break;
7010Sstevel@tonic-gate case EOT:
7020Sstevel@tonic-gate eopat[*ap++] = lp;
7030Sstevel@tonic-gate break;
7040Sstevel@tonic-gate case BOW:
7050Sstevel@tonic-gate if (lp!=bol && iswordc(lp[-1]) || !iswordc(*lp))
7060Sstevel@tonic-gate return 0;
7070Sstevel@tonic-gate break;
7080Sstevel@tonic-gate case EOW:
7090Sstevel@tonic-gate if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
7100Sstevel@tonic-gate return 0;
7110Sstevel@tonic-gate break;
7120Sstevel@tonic-gate case REF:
7130Sstevel@tonic-gate n = *ap++;
7140Sstevel@tonic-gate bp = bopat[n];
7150Sstevel@tonic-gate ep = eopat[n];
7160Sstevel@tonic-gate while (bp < ep)
7170Sstevel@tonic-gate if (*bp++ != *lp++)
7180Sstevel@tonic-gate return 0;
7190Sstevel@tonic-gate break;
7200Sstevel@tonic-gate case CLO:
7210Sstevel@tonic-gate are = lp;
7220Sstevel@tonic-gate switch(*ap) {
7230Sstevel@tonic-gate
7240Sstevel@tonic-gate case ANY:
7250Sstevel@tonic-gate while (*lp)
7260Sstevel@tonic-gate lp++;
7270Sstevel@tonic-gate n = ANYSKIP;
7280Sstevel@tonic-gate break;
7290Sstevel@tonic-gate case CHR:
7300Sstevel@tonic-gate c = *(ap+1);
7310Sstevel@tonic-gate while (*lp && c == *lp)
7320Sstevel@tonic-gate lp++;
7330Sstevel@tonic-gate n = CHRSKIP;
7340Sstevel@tonic-gate break;
7350Sstevel@tonic-gate case CCL:
7360Sstevel@tonic-gate while ((c = *lp) && isinset(ap+1,c))
7370Sstevel@tonic-gate lp++;
7380Sstevel@tonic-gate n = CCLSKIP;
7390Sstevel@tonic-gate break;
7400Sstevel@tonic-gate default:
7410Sstevel@tonic-gate re_fail("closure: bad nfa.", *ap);
7420Sstevel@tonic-gate return 0;
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate
7450Sstevel@tonic-gate ap += n;
7460Sstevel@tonic-gate
7470Sstevel@tonic-gate while (lp >= are) {
7480Sstevel@tonic-gate if (e = pmatch(lp, ap))
7490Sstevel@tonic-gate return e;
7500Sstevel@tonic-gate --lp;
7510Sstevel@tonic-gate }
7520Sstevel@tonic-gate return 0;
7530Sstevel@tonic-gate default:
7540Sstevel@tonic-gate re_fail("re_exec: bad nfa.", op);
7550Sstevel@tonic-gate return 0;
7560Sstevel@tonic-gate }
7570Sstevel@tonic-gate return lp;
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate
7600Sstevel@tonic-gate /*
7610Sstevel@tonic-gate * re_modw:
7620Sstevel@tonic-gate * add new characters into the word table to change re_exec's
7630Sstevel@tonic-gate * understanding of what a word should look like. Note that we
7640Sstevel@tonic-gate * only accept additions into the word definition.
7650Sstevel@tonic-gate *
7660Sstevel@tonic-gate * If the string parameter is 0 or null string, the table is
7670Sstevel@tonic-gate * reset back to the default containing A-Z a-z 0-9 _. [We use
7680Sstevel@tonic-gate * the compact bitset representation for the default table]
7690Sstevel@tonic-gate */
7700Sstevel@tonic-gate
7710Sstevel@tonic-gate static CHAR deftab[16] = {
7720Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
7730Sstevel@tonic-gate 0376, 0377, 0377, 007
7740Sstevel@tonic-gate };
7750Sstevel@tonic-gate
7760Sstevel@tonic-gate void
re_modw(char * s)7770Sstevel@tonic-gate re_modw( char *s )
7780Sstevel@tonic-gate {
7790Sstevel@tonic-gate register int i;
7800Sstevel@tonic-gate
7810Sstevel@tonic-gate if (!s || !*s) {
7820Sstevel@tonic-gate for (i = 0; i < MAXCHR; i++)
7830Sstevel@tonic-gate if (!isinset(deftab,i))
7840Sstevel@tonic-gate iswordc(i) = 0;
7850Sstevel@tonic-gate }
7860Sstevel@tonic-gate else
7870Sstevel@tonic-gate while(*s)
7880Sstevel@tonic-gate iswordc(*s++) = 1;
7890Sstevel@tonic-gate }
7900Sstevel@tonic-gate
7910Sstevel@tonic-gate /*
7920Sstevel@tonic-gate * re_subs:
7930Sstevel@tonic-gate * substitute the matched portions of the src in dst.
7940Sstevel@tonic-gate *
7950Sstevel@tonic-gate * & substitute the entire matched pattern.
7960Sstevel@tonic-gate *
7970Sstevel@tonic-gate * \digit substitute a subpattern, with the given tag number.
7980Sstevel@tonic-gate * Tags are numbered from 1 to 9. If the particular
7990Sstevel@tonic-gate * tagged subpattern does not exist, null is substituted.
8000Sstevel@tonic-gate */
8010Sstevel@tonic-gate int
re_subs(char * src,char * dst)8020Sstevel@tonic-gate re_subs( char *src, char *dst)
8030Sstevel@tonic-gate {
8040Sstevel@tonic-gate register char c;
8050Sstevel@tonic-gate register int pin;
8060Sstevel@tonic-gate register char *bp;
8070Sstevel@tonic-gate register char *ep;
8080Sstevel@tonic-gate
8090Sstevel@tonic-gate if (!*src || !bopat[0])
8100Sstevel@tonic-gate return 0;
8110Sstevel@tonic-gate
8120Sstevel@tonic-gate while (c = *src++) {
8130Sstevel@tonic-gate switch(c) {
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate case '&':
8160Sstevel@tonic-gate pin = 0;
8170Sstevel@tonic-gate break;
8180Sstevel@tonic-gate
8190Sstevel@tonic-gate case '\\':
8200Sstevel@tonic-gate c = *src++;
8210Sstevel@tonic-gate if (c >= '0' && c <= '9') {
8220Sstevel@tonic-gate pin = c - '0';
8230Sstevel@tonic-gate break;
8240Sstevel@tonic-gate }
8250Sstevel@tonic-gate
8260Sstevel@tonic-gate default:
8270Sstevel@tonic-gate *dst++ = c;
8280Sstevel@tonic-gate continue;
8290Sstevel@tonic-gate }
8300Sstevel@tonic-gate
8310Sstevel@tonic-gate if ((bp = bopat[pin]) && (ep = eopat[pin])) {
8320Sstevel@tonic-gate while (*bp && bp < ep)
8330Sstevel@tonic-gate *dst++ = *bp++;
8340Sstevel@tonic-gate if (bp < ep)
8350Sstevel@tonic-gate return 0;
8360Sstevel@tonic-gate }
8370Sstevel@tonic-gate }
8380Sstevel@tonic-gate *dst = (char) 0;
8390Sstevel@tonic-gate return 1;
8400Sstevel@tonic-gate }
8410Sstevel@tonic-gate
8420Sstevel@tonic-gate #ifdef DEBUG
8430Sstevel@tonic-gate /*
8440Sstevel@tonic-gate * symbolic - produce a symbolic dump of the nfa
8450Sstevel@tonic-gate */
8460Sstevel@tonic-gate void
symbolic(char * s)8470Sstevel@tonic-gate symbolic( char *s )
8480Sstevel@tonic-gate {
8490Sstevel@tonic-gate (void) printf("pattern: %s\n", s);
8500Sstevel@tonic-gate (void) printf("nfacode:\n");
8510Sstevel@tonic-gate nfadump(nfa);
8520Sstevel@tonic-gate }
8530Sstevel@tonic-gate
8540Sstevel@tonic-gate static void
nfadump(CHAR * ap)8550Sstevel@tonic-gate nfadump( CHAR *ap)
8560Sstevel@tonic-gate {
8570Sstevel@tonic-gate register int n;
8580Sstevel@tonic-gate
8590Sstevel@tonic-gate while (*ap != END)
8600Sstevel@tonic-gate switch(*ap++) {
8610Sstevel@tonic-gate case CLO:
8620Sstevel@tonic-gate (void) printf("CLOSURE");
8630Sstevel@tonic-gate nfadump(ap);
8640Sstevel@tonic-gate switch(*ap) {
8650Sstevel@tonic-gate case CHR:
8660Sstevel@tonic-gate n = CHRSKIP;
8670Sstevel@tonic-gate break;
8680Sstevel@tonic-gate case ANY:
8690Sstevel@tonic-gate n = ANYSKIP;
8700Sstevel@tonic-gate break;
8710Sstevel@tonic-gate case CCL:
8720Sstevel@tonic-gate n = CCLSKIP;
8730Sstevel@tonic-gate break;
8740Sstevel@tonic-gate }
8750Sstevel@tonic-gate ap += n;
8760Sstevel@tonic-gate break;
8770Sstevel@tonic-gate case CHR:
8780Sstevel@tonic-gate (void) printf("\tCHR %c\n",*ap++);
8790Sstevel@tonic-gate break;
8800Sstevel@tonic-gate case ANY:
8810Sstevel@tonic-gate (void) printf("\tANY .\n");
8820Sstevel@tonic-gate break;
8830Sstevel@tonic-gate case BOL:
8840Sstevel@tonic-gate (void) printf("\tBOL -\n");
8850Sstevel@tonic-gate break;
8860Sstevel@tonic-gate case EOL:
8870Sstevel@tonic-gate (void) printf("\tEOL -\n");
8880Sstevel@tonic-gate break;
8890Sstevel@tonic-gate case BOT:
8900Sstevel@tonic-gate (void) printf("BOT: %d\n",*ap++);
8910Sstevel@tonic-gate break;
8920Sstevel@tonic-gate case EOT:
8930Sstevel@tonic-gate (void) printf("EOT: %d\n",*ap++);
8940Sstevel@tonic-gate break;
8950Sstevel@tonic-gate case BOW:
8960Sstevel@tonic-gate (void) printf("BOW\n");
8970Sstevel@tonic-gate break;
8980Sstevel@tonic-gate case EOW:
8990Sstevel@tonic-gate (void) printf("EOW\n");
9000Sstevel@tonic-gate break;
9010Sstevel@tonic-gate case REF:
9020Sstevel@tonic-gate (void) printf("REF: %d\n",*ap++);
9030Sstevel@tonic-gate break;
9040Sstevel@tonic-gate case CCL:
9050Sstevel@tonic-gate (void) printf("\tCCL [");
9060Sstevel@tonic-gate for (n = 0; n < MAXCHR; n++)
9070Sstevel@tonic-gate if (isinset(ap,(CHAR)n)) {
9080Sstevel@tonic-gate if (n < ' ')
9090Sstevel@tonic-gate (void) printf("^%c", n ^ 0x040);
9100Sstevel@tonic-gate else
9110Sstevel@tonic-gate (void) printf("%c", n);
9120Sstevel@tonic-gate }
9130Sstevel@tonic-gate (void) printf("]\n");
9140Sstevel@tonic-gate ap += BITBLK;
9150Sstevel@tonic-gate break;
9160Sstevel@tonic-gate default:
9170Sstevel@tonic-gate (void) printf("bad nfa. opcode %o\n", ap[-1]);
9180Sstevel@tonic-gate exit(1);
9190Sstevel@tonic-gate break;
9200Sstevel@tonic-gate }
9210Sstevel@tonic-gate }
9220Sstevel@tonic-gate #endif
9230Sstevel@tonic-gate #endif /* MACOS or DOS or NEED_BSDREGEX */
924