1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  *
3*0Sstevel@tonic-gate  * Portions Copyright %G% Sun Microsystems, Inc. All Rights Reserved
4*0Sstevel@tonic-gate  *
5*0Sstevel@tonic-gate  */
6*0Sstevel@tonic-gate 
7*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
8*0Sstevel@tonic-gate 
9*0Sstevel@tonic-gate #include "portable.h"
10*0Sstevel@tonic-gate #include "stdio.h"
11*0Sstevel@tonic-gate #include "stdlib.h"
12*0Sstevel@tonic-gate 
13*0Sstevel@tonic-gate #if defined( MACOS ) || defined( DOS ) || defined( _WIN32 ) || defined( NEED_BSDREGEX )
14*0Sstevel@tonic-gate #include "regex.h"
15*0Sstevel@tonic-gate 
16*0Sstevel@tonic-gate /*
17*0Sstevel@tonic-gate  * regex - Regular expression pattern matching  and replacement
18*0Sstevel@tonic-gate  *
19*0Sstevel@tonic-gate  * By:  Ozan S. Yigit (oz)
20*0Sstevel@tonic-gate  *      Dept. of Computer Science
21*0Sstevel@tonic-gate  *      York University
22*0Sstevel@tonic-gate  *
23*0Sstevel@tonic-gate  * These routines are the PUBLIC DOMAIN equivalents of regex
24*0Sstevel@tonic-gate  * routines as found in 4.nBSD UN*X, with minor extensions.
25*0Sstevel@tonic-gate  *
26*0Sstevel@tonic-gate  * These routines are derived from various implementations found
27*0Sstevel@tonic-gate  * in software tools books, and Conroy's grep. They are NOT derived
28*0Sstevel@tonic-gate  * from licensed/restricted software.
29*0Sstevel@tonic-gate  * For more interesting/academic/complicated implementations,
30*0Sstevel@tonic-gate  * see Henry Spencer's regexp routines, or GNU Emacs pattern
31*0Sstevel@tonic-gate  * matching module.
32*0Sstevel@tonic-gate  *
33*0Sstevel@tonic-gate  * Modification history:
34*0Sstevel@tonic-gate  *
35*0Sstevel@tonic-gate  * $Log: regex.c,v $
36*0Sstevel@tonic-gate  * Revision 1.12  1996/04/25  16:20:59  mcs
37*0Sstevel@tonic-gate  * make re_exec() match "" with ".*" and similar patterns
38*0Sstevel@tonic-gate  * hopefully this change doesn't break anything else!
39*0Sstevel@tonic-gate  *
40*0Sstevel@tonic-gate  * Revision 1.11  1994/12/14  21:33:45  mcs
41*0Sstevel@tonic-gate  * use new NEED_BSDREGEX
42*0Sstevel@tonic-gate  * fix pmatch() prototype
43*0Sstevel@tonic-gate  *
44*0Sstevel@tonic-gate  * Revision 1.10  1994/12/12  18:16:39  mcs
45*0Sstevel@tonic-gate  * use on NetBSD
46*0Sstevel@tonic-gate  *
47*0Sstevel@tonic-gate  * Revision 1.9  1994/11/15  19:16:35  mcs
48*0Sstevel@tonic-gate  * add (CHAR) cast to make VisualC++ happy
49*0Sstevel@tonic-gate  *
50*0Sstevel@tonic-gate  * Revision 1.8  1994/11/08  21:14:32  mcs
51*0Sstevel@tonic-gate  * WIN32 changes
52*0Sstevel@tonic-gate  *
53*0Sstevel@tonic-gate  * Revision 1.7  1994/07/23  19:51:24  mcs
54*0Sstevel@tonic-gate  * use ANSI-style inline function parameters
55*0Sstevel@tonic-gate  *
56*0Sstevel@tonic-gate  * Revision 1.6  1993/10/18  01:52:32  tim
57*0Sstevel@tonic-gate  * include for VMS
58*0Sstevel@tonic-gate  *
59*0Sstevel@tonic-gate  * Revision 1.5  1993/09/28  21:37:54  mcs
60*0Sstevel@tonic-gate  * HP/UX needs the regex we include (not in its libc)
61*0Sstevel@tonic-gate  *
62*0Sstevel@tonic-gate  * Revision 1.4  1993/08/27  15:59:52  mcs
63*0Sstevel@tonic-gate  * use CHAR for deftab
64*0Sstevel@tonic-gate  *
65*0Sstevel@tonic-gate  * Revision 1.3  1993/08/27  15:49:47  mcs
66*0Sstevel@tonic-gate  * added missing 0 to octal constants
67*0Sstevel@tonic-gate  * use unsigned char for CHAR under DOS
68*0Sstevel@tonic-gate  *
69*0Sstevel@tonic-gate  * Revision 1.2  1993/08/27  14:57:48  mcs
70*0Sstevel@tonic-gate  * add proto. for pmatch
71*0Sstevel@tonic-gate  *
72*0Sstevel@tonic-gate  * Revision 1.1  1993/08/18  21:20:02  mcs
73*0Sstevel@tonic-gate  * Initial revision
74*0Sstevel@tonic-gate  *
75*0Sstevel@tonic-gate  * Revision 1.4  1991/10/17  03:56:42  oz
76*0Sstevel@tonic-gate  * miscellaneous changes, small cleanups etc.
77*0Sstevel@tonic-gate  *
78*0Sstevel@tonic-gate  * Revision 1.3  1989/04/01  14:18:09  oz
79*0Sstevel@tonic-gate  * Change all references to a dfa: this is actually an nfa.
80*0Sstevel@tonic-gate  *
81*0Sstevel@tonic-gate  * Revision 1.2  88/08/28  15:36:04  oz
82*0Sstevel@tonic-gate  * Use a complement bitmap to represent NCL.
83*0Sstevel@tonic-gate  * This removes the need to have seperate
84*0Sstevel@tonic-gate  * code in the pmatch case block - it is
85*0Sstevel@tonic-gate  * just CCL code now.
86*0Sstevel@tonic-gate  *
87*0Sstevel@tonic-gate  * Use the actual CCL code in the CLO
88*0Sstevel@tonic-gate  * section of pmatch. No need for a recursive
89*0Sstevel@tonic-gate  * pmatch call.
90*0Sstevel@tonic-gate  *
91*0Sstevel@tonic-gate  * Use a bitmap table to set char bits in an
92*0Sstevel@tonic-gate  * 8-bit chunk.
93*0Sstevel@tonic-gate  *
94*0Sstevel@tonic-gate  * Interfaces:
95*0Sstevel@tonic-gate  *      re_comp:        compile a regular expression into a NFA.
96*0Sstevel@tonic-gate  *
97*0Sstevel@tonic-gate  *			char *re_comp(s)
98*0Sstevel@tonic-gate  *			char *s;
99*0Sstevel@tonic-gate  *
100*0Sstevel@tonic-gate  *      re_exec:        execute the NFA to match a pattern.
101*0Sstevel@tonic-gate  *
102*0Sstevel@tonic-gate  *			int re_exec(s)
103*0Sstevel@tonic-gate  *			char *s;
104*0Sstevel@tonic-gate  *
105*0Sstevel@tonic-gate  *	re_modw		change re_exec's understanding of what a "word"
106*0Sstevel@tonic-gate  *			looks like (for \< and \>) by adding into the
107*0Sstevel@tonic-gate  *			hidden word-syntax table.
108*0Sstevel@tonic-gate  *
109*0Sstevel@tonic-gate  *			void re_modw(s)
110*0Sstevel@tonic-gate  *			char *s;
111*0Sstevel@tonic-gate  *
112*0Sstevel@tonic-gate  *      re_subs:	substitute the matched portions in a new string.
113*0Sstevel@tonic-gate  *
114*0Sstevel@tonic-gate  *			int re_subs(src, dst)
115*0Sstevel@tonic-gate  *			char *src;
116*0Sstevel@tonic-gate  *			char *dst;
117*0Sstevel@tonic-gate  *
118*0Sstevel@tonic-gate  *	re_fail:	failure routine for re_exec.
119*0Sstevel@tonic-gate  *
120*0Sstevel@tonic-gate  *			void re_fail(msg, op)
121*0Sstevel@tonic-gate  *			char *msg;
122*0Sstevel@tonic-gate  *			char op;
123*0Sstevel@tonic-gate  *
124*0Sstevel@tonic-gate  * Regular Expressions:
125*0Sstevel@tonic-gate  *
126*0Sstevel@tonic-gate  *      [1]     char    matches itself, unless it is a special
127*0Sstevel@tonic-gate  *                      character (metachar): . \ [ ] * + ^ $
128*0Sstevel@tonic-gate  *
129*0Sstevel@tonic-gate  *      [2]     .       matches any character.
130*0Sstevel@tonic-gate  *
131*0Sstevel@tonic-gate  *      [3]     \       matches the character following it, except
132*0Sstevel@tonic-gate  *			when followed by a left or right round bracket,
133*0Sstevel@tonic-gate  *			a digit 1 to 9 or a left or right angle bracket.
134*0Sstevel@tonic-gate  *			(see [7], [8] and [9])
135*0Sstevel@tonic-gate  *			It is used as an escape character for all
136*0Sstevel@tonic-gate  *			other meta-characters, and itself. When used
137*0Sstevel@tonic-gate  *			in a set ([4]), it is treated as an ordinary
138*0Sstevel@tonic-gate  *			character.
139*0Sstevel@tonic-gate  *
140*0Sstevel@tonic-gate  *      [4]     [set]   matches one of the characters in the set.
141*0Sstevel@tonic-gate  *                      If the first character in the set is "^",
142*0Sstevel@tonic-gate  *                      it matches a character NOT in the set, i.e.
143*0Sstevel@tonic-gate  *			complements the set. A shorthand S-E is
144*0Sstevel@tonic-gate  *			used to specify a set of characters S upto
145*0Sstevel@tonic-gate  *			E, inclusive. The special characters "]" and
146*0Sstevel@tonic-gate  *			"-" have no special meaning if they appear
147*0Sstevel@tonic-gate  *			as the first chars in the set.
148*0Sstevel@tonic-gate  *                      examples:        match:
149*0Sstevel@tonic-gate  *
150*0Sstevel@tonic-gate  *                              [a-z]    any lowercase alpha
151*0Sstevel@tonic-gate  *
152*0Sstevel@tonic-gate  *                              [^]-]    any char except ] and -
153*0Sstevel@tonic-gate  *
154*0Sstevel@tonic-gate  *                              [^A-Z]   any char except uppercase
155*0Sstevel@tonic-gate  *                                       alpha
156*0Sstevel@tonic-gate  *
157*0Sstevel@tonic-gate  *                              [a-zA-Z] any alpha
158*0Sstevel@tonic-gate  *
159*0Sstevel@tonic-gate  *      [5]     *       any regular expression form [1] to [4], followed by
160*0Sstevel@tonic-gate  *                      closure char (*) matches zero or more matches of
161*0Sstevel@tonic-gate  *                      that form.
162*0Sstevel@tonic-gate  *
163*0Sstevel@tonic-gate  *      [6]     +       same as [5], except it matches one or more.
164*0Sstevel@tonic-gate  *
165*0Sstevel@tonic-gate  *      [7]             a regular expression in the form [1] to [10], enclosed
166*0Sstevel@tonic-gate  *                      as \(form\) matches what form matches. The enclosure
167*0Sstevel@tonic-gate  *                      creates a set of tags, used for [8] and for
168*0Sstevel@tonic-gate  *                      pattern substution. The tagged forms are numbered
169*0Sstevel@tonic-gate  *			starting from 1.
170*0Sstevel@tonic-gate  *
171*0Sstevel@tonic-gate  *      [8]             a \ followed by a digit 1 to 9 matches whatever a
172*0Sstevel@tonic-gate  *                      previously tagged regular expression ([7]) matched.
173*0Sstevel@tonic-gate  *
174*0Sstevel@tonic-gate  *	[9]	\<	a regular expression starting with a \< construct
175*0Sstevel@tonic-gate  *		\>	and/or ending with a \> construct, restricts the
176*0Sstevel@tonic-gate  *			pattern matching to the beginning of a word, and/or
177*0Sstevel@tonic-gate  *			the end of a word. A word is defined to be a character
178*0Sstevel@tonic-gate  *			string beginning and/or ending with the characters
179*0Sstevel@tonic-gate  *			A-Z a-z 0-9 and _. It must also be preceded and/or
180*0Sstevel@tonic-gate  *			followed by any character outside those mentioned.
181*0Sstevel@tonic-gate  *
182*0Sstevel@tonic-gate  *      [10]            a composite regular expression xy where x and y
183*0Sstevel@tonic-gate  *                      are in the form [1] to [10] matches the longest
184*0Sstevel@tonic-gate  *                      match of x followed by a match for y.
185*0Sstevel@tonic-gate  *
186*0Sstevel@tonic-gate  *      [11]	^	a regular expression starting with a ^ character
187*0Sstevel@tonic-gate  *		$	and/or ending with a $ character, restricts the
188*0Sstevel@tonic-gate  *                      pattern matching to the beginning of the line,
189*0Sstevel@tonic-gate  *                      or the end of line. [anchors] Elsewhere in the
190*0Sstevel@tonic-gate  *			pattern, ^ and $ are treated as ordinary characters.
191*0Sstevel@tonic-gate  *
192*0Sstevel@tonic-gate  *
193*0Sstevel@tonic-gate  * Acknowledgements:
194*0Sstevel@tonic-gate  *
195*0Sstevel@tonic-gate  *	HCR's Hugh Redelmeier has been most helpful in various
196*0Sstevel@tonic-gate  *	stages of development. He convinced me to include BOW
197*0Sstevel@tonic-gate  *	and EOW constructs, originally invented by Rob Pike at
198*0Sstevel@tonic-gate  *	the University of Toronto.
199*0Sstevel@tonic-gate  *
200*0Sstevel@tonic-gate  * References:
201*0Sstevel@tonic-gate  *              Software tools			Kernighan & Plauger
202*0Sstevel@tonic-gate  *              Software tools in Pascal        Kernighan & Plauger
203*0Sstevel@tonic-gate  *              Grep [rsx-11 C dist]            David Conroy
204*0Sstevel@tonic-gate  *		ed - text editor		Un*x Programmer's Manual
205*0Sstevel@tonic-gate  *		Advanced editing on Un*x	B. W. Kernighan
206*0Sstevel@tonic-gate  *		RegExp routines			Henry Spencer
207*0Sstevel@tonic-gate  *
208*0Sstevel@tonic-gate  * Notes:
209*0Sstevel@tonic-gate  *
210*0Sstevel@tonic-gate  *	This implementation uses a bit-set representation for character
211*0Sstevel@tonic-gate  *	classes for speed and compactness. Each character is represented
212*0Sstevel@tonic-gate  *	by one bit in a 128-bit block. Thus, CCL always takes a
213*0Sstevel@tonic-gate  *	constant 16 bytes in the internal nfa, and re_exec does a single
214*0Sstevel@tonic-gate  *	bit comparison to locate the character in the set.
215*0Sstevel@tonic-gate  *
216*0Sstevel@tonic-gate  * Examples:
217*0Sstevel@tonic-gate  *
218*0Sstevel@tonic-gate  *	pattern:	foo*.*
219*0Sstevel@tonic-gate  *	compile:	CHR f CHR o CLO CHR o END CLO ANY END END
220*0Sstevel@tonic-gate  *	matches:	fo foo fooo foobar fobar foxx ...
221*0Sstevel@tonic-gate  *
222*0Sstevel@tonic-gate  *	pattern:	fo[ob]a[rz]
223*0Sstevel@tonic-gate  *	compile:	CHR f CHR o CCL bitset CHR a CCL bitset END
224*0Sstevel@tonic-gate  *	matches:	fobar fooar fobaz fooaz
225*0Sstevel@tonic-gate  *
226*0Sstevel@tonic-gate  *	pattern:	foo\\+
227*0Sstevel@tonic-gate  *	compile:	CHR f CHR o CHR o CHR \ CLO CHR \ END END
228*0Sstevel@tonic-gate  *	matches:	foo\ foo\\ foo\\\  ...
229*0Sstevel@tonic-gate  *
230*0Sstevel@tonic-gate  *	pattern:	\(foo\)[1-3]\1	(same as foo[1-3]foo)
231*0Sstevel@tonic-gate  *	compile:	BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
232*0Sstevel@tonic-gate  *	matches:	foo1foo foo2foo foo3foo
233*0Sstevel@tonic-gate  *
234*0Sstevel@tonic-gate  *	pattern:	\(fo.*\)-\1
235*0Sstevel@tonic-gate  *	compile:	BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
236*0Sstevel@tonic-gate  *	matches:	foo-foo fo-fo fob-fob foobar-foobar ...
237*0Sstevel@tonic-gate  */
238*0Sstevel@tonic-gate 
239*0Sstevel@tonic-gate #define MAXNFA  1024
240*0Sstevel@tonic-gate #define MAXTAG  10
241*0Sstevel@tonic-gate 
242*0Sstevel@tonic-gate #define OKP     1
243*0Sstevel@tonic-gate #define NOP     0
244*0Sstevel@tonic-gate 
245*0Sstevel@tonic-gate #define CHR     1
246*0Sstevel@tonic-gate #define ANY     2
247*0Sstevel@tonic-gate #define CCL     3
248*0Sstevel@tonic-gate #define BOL     4
249*0Sstevel@tonic-gate #define EOL     5
250*0Sstevel@tonic-gate #define BOT     6
251*0Sstevel@tonic-gate #define EOT     7
252*0Sstevel@tonic-gate #define BOW	8
253*0Sstevel@tonic-gate #define EOW	9
254*0Sstevel@tonic-gate #define REF     10
255*0Sstevel@tonic-gate #define CLO     11
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate #define END     0
258*0Sstevel@tonic-gate 
259*0Sstevel@tonic-gate /*
260*0Sstevel@tonic-gate  * The following defines are not meant to be changeable.
261*0Sstevel@tonic-gate  * They are for readability only.
262*0Sstevel@tonic-gate  */
263*0Sstevel@tonic-gate #define MAXCHR	128
264*0Sstevel@tonic-gate #define CHRBIT	8
265*0Sstevel@tonic-gate #define BITBLK	MAXCHR/CHRBIT
266*0Sstevel@tonic-gate #define BLKIND	0170
267*0Sstevel@tonic-gate #define BITIND	07
268*0Sstevel@tonic-gate 
269*0Sstevel@tonic-gate #define ASCIIB	0177
270*0Sstevel@tonic-gate 
271*0Sstevel@tonic-gate #if defined( DOS ) || defined( _WIN32 ) || defined(SUN)
272*0Sstevel@tonic-gate typedef unsigned char CHAR;
273*0Sstevel@tonic-gate #else /* DOS */
274*0Sstevel@tonic-gate typedef /*unsigned*/ char CHAR;
275*0Sstevel@tonic-gate #endif /* DOS */
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate static int  tagstk[MAXTAG];             /* subpat tag stack..*/
278*0Sstevel@tonic-gate static CHAR nfa[MAXNFA];		/* automaton..       */
279*0Sstevel@tonic-gate static int  sta = NOP;               	/* status of lastpat */
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate static CHAR bittab[BITBLK];		/* bit table for CCL */
282*0Sstevel@tonic-gate 					/* pre-set bits...   */
283*0Sstevel@tonic-gate static CHAR bitarr[] = {1,2,4,8,16,32,64,128};
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate #ifdef DEBUG
286*0Sstevel@tonic-gate static void nfadump(CHAR *);
287*0Sstevel@tonic-gate void symbolic(char *);
288*0Sstevel@tonic-gate #endif
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate static void
291*0Sstevel@tonic-gate chset(CHAR c)
292*0Sstevel@tonic-gate {
293*0Sstevel@tonic-gate 	bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND];
294*0Sstevel@tonic-gate }
295*0Sstevel@tonic-gate 
296*0Sstevel@tonic-gate #define badpat(x)	(*nfa = END, x)
297*0Sstevel@tonic-gate #define store(x)	*mp++ = x
298*0Sstevel@tonic-gate 
299*0Sstevel@tonic-gate char *
300*0Sstevel@tonic-gate re_comp( char *pat )
301*0Sstevel@tonic-gate {
302*0Sstevel@tonic-gate 	register char *p;               /* pattern pointer   */
303*0Sstevel@tonic-gate 	register CHAR *mp=nfa;          /* nfa pointer       */
304*0Sstevel@tonic-gate 	register CHAR *lp;              /* saved pointer..   */
305*0Sstevel@tonic-gate 	register CHAR *sp=nfa;          /* another one..     */
306*0Sstevel@tonic-gate 
307*0Sstevel@tonic-gate 	register int tagi = 0;          /* tag stack index   */
308*0Sstevel@tonic-gate 	register int tagc = 1;          /* actual tag count  */
309*0Sstevel@tonic-gate 
310*0Sstevel@tonic-gate 	register int n;
311*0Sstevel@tonic-gate 	register CHAR mask;		/* xor mask -CCL/NCL */
312*0Sstevel@tonic-gate 	int c1, c2;
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate 	if (!pat || !*pat)
315*0Sstevel@tonic-gate 		if (sta)
316*0Sstevel@tonic-gate 			return 0;
317*0Sstevel@tonic-gate 		else
318*0Sstevel@tonic-gate 			return badpat("No previous regular expression");
319*0Sstevel@tonic-gate 	sta = NOP;
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate 	for (p = pat; *p; p++) {
322*0Sstevel@tonic-gate 		lp = mp;
323*0Sstevel@tonic-gate 		switch(*p) {
324*0Sstevel@tonic-gate 
325*0Sstevel@tonic-gate 		case '.':               /* match any char..  */
326*0Sstevel@tonic-gate 			store(ANY);
327*0Sstevel@tonic-gate 			break;
328*0Sstevel@tonic-gate 
329*0Sstevel@tonic-gate 		case '^':               /* match beginning.. */
330*0Sstevel@tonic-gate 			if (p == pat)
331*0Sstevel@tonic-gate 				store(BOL);
332*0Sstevel@tonic-gate 			else {
333*0Sstevel@tonic-gate 				store(CHR);
334*0Sstevel@tonic-gate 				store(*p);
335*0Sstevel@tonic-gate 			}
336*0Sstevel@tonic-gate 			break;
337*0Sstevel@tonic-gate 
338*0Sstevel@tonic-gate 		case '$':               /* match endofline.. */
339*0Sstevel@tonic-gate 			if (!*(p+1))
340*0Sstevel@tonic-gate 				store(EOL);
341*0Sstevel@tonic-gate 			else {
342*0Sstevel@tonic-gate 				store(CHR);
343*0Sstevel@tonic-gate 				store(*p);
344*0Sstevel@tonic-gate 			}
345*0Sstevel@tonic-gate 			break;
346*0Sstevel@tonic-gate 
347*0Sstevel@tonic-gate 		case '[':               /* match char class..*/
348*0Sstevel@tonic-gate 			store(CCL);
349*0Sstevel@tonic-gate 
350*0Sstevel@tonic-gate 			if (*++p == '^') {
351*0Sstevel@tonic-gate 				mask = 0377;
352*0Sstevel@tonic-gate 				p++;
353*0Sstevel@tonic-gate 			}
354*0Sstevel@tonic-gate 			else
355*0Sstevel@tonic-gate 				mask = 0;
356*0Sstevel@tonic-gate 
357*0Sstevel@tonic-gate 			if (*p == '-')		/* real dash */
358*0Sstevel@tonic-gate 				chset(*p++);
359*0Sstevel@tonic-gate 			if (*p == ']')		/* real brac */
360*0Sstevel@tonic-gate 				chset(*p++);
361*0Sstevel@tonic-gate 			while (*p && *p != ']') {
362*0Sstevel@tonic-gate 				if (*p == '-' && *(p+1) && *(p+1) != ']') {
363*0Sstevel@tonic-gate 					p++;
364*0Sstevel@tonic-gate 					c1 = *(p-2) + 1;
365*0Sstevel@tonic-gate 					c2 = *p++;
366*0Sstevel@tonic-gate 					while (c1 <= c2)
367*0Sstevel@tonic-gate 						chset((CHAR)c1++);
368*0Sstevel@tonic-gate 				}
369*0Sstevel@tonic-gate #ifdef EXTEND
370*0Sstevel@tonic-gate 				else if (*p == '\\' && *(p+1)) {
371*0Sstevel@tonic-gate 					p++;
372*0Sstevel@tonic-gate 					chset(*p++);
373*0Sstevel@tonic-gate 				}
374*0Sstevel@tonic-gate #endif
375*0Sstevel@tonic-gate 				else
376*0Sstevel@tonic-gate 					chset(*p++);
377*0Sstevel@tonic-gate 			}
378*0Sstevel@tonic-gate 			if (!*p)
379*0Sstevel@tonic-gate 				return badpat("Missing ]");
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate 			for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
382*0Sstevel@tonic-gate 				store(mask ^ bittab[n]);
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate 			break;
385*0Sstevel@tonic-gate 
386*0Sstevel@tonic-gate 		case '*':               /* match 0 or more.. */
387*0Sstevel@tonic-gate 		case '+':               /* match 1 or more.. */
388*0Sstevel@tonic-gate 			if (p == pat)
389*0Sstevel@tonic-gate 				return badpat("Empty closure");
390*0Sstevel@tonic-gate 			lp = sp;		/* previous opcode */
391*0Sstevel@tonic-gate 			if (*lp == CLO)		/* equivalence..   */
392*0Sstevel@tonic-gate 				break;
393*0Sstevel@tonic-gate 			switch(*lp) {
394*0Sstevel@tonic-gate 
395*0Sstevel@tonic-gate 			case BOL:
396*0Sstevel@tonic-gate 			case BOT:
397*0Sstevel@tonic-gate 			case EOT:
398*0Sstevel@tonic-gate 			case BOW:
399*0Sstevel@tonic-gate 			case EOW:
400*0Sstevel@tonic-gate 			case REF:
401*0Sstevel@tonic-gate 				return badpat("Illegal closure");
402*0Sstevel@tonic-gate 			default:
403*0Sstevel@tonic-gate 				break;
404*0Sstevel@tonic-gate 			}
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate 			if (*p == '+')
407*0Sstevel@tonic-gate 				for (sp = mp; lp < sp; lp++)
408*0Sstevel@tonic-gate 					store(*lp);
409*0Sstevel@tonic-gate 
410*0Sstevel@tonic-gate 			store(END);
411*0Sstevel@tonic-gate 			store(END);
412*0Sstevel@tonic-gate 			sp = mp;
413*0Sstevel@tonic-gate 			while (--mp > lp)
414*0Sstevel@tonic-gate 				*mp = mp[-1];
415*0Sstevel@tonic-gate 			store(CLO);
416*0Sstevel@tonic-gate 			mp = sp;
417*0Sstevel@tonic-gate 			break;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 		case '\\':              /* tags, backrefs .. */
420*0Sstevel@tonic-gate 			switch(*++p) {
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate 			case '(':
423*0Sstevel@tonic-gate 				if (tagc < MAXTAG) {
424*0Sstevel@tonic-gate 					tagstk[++tagi] = tagc;
425*0Sstevel@tonic-gate 					store(BOT);
426*0Sstevel@tonic-gate 					store(tagc++);
427*0Sstevel@tonic-gate 				}
428*0Sstevel@tonic-gate 				else
429*0Sstevel@tonic-gate 					return badpat("Too many \\(\\) pairs");
430*0Sstevel@tonic-gate 				break;
431*0Sstevel@tonic-gate 			case ')':
432*0Sstevel@tonic-gate 				if (*sp == BOT)
433*0Sstevel@tonic-gate 					return badpat("Null pattern inside \\(\\)");
434*0Sstevel@tonic-gate 				if (tagi > 0) {
435*0Sstevel@tonic-gate 					store(EOT);
436*0Sstevel@tonic-gate 					store(tagstk[tagi--]);
437*0Sstevel@tonic-gate 				}
438*0Sstevel@tonic-gate 				else
439*0Sstevel@tonic-gate 					return badpat("Unmatched \\)");
440*0Sstevel@tonic-gate 				break;
441*0Sstevel@tonic-gate 			case '<':
442*0Sstevel@tonic-gate 				store(BOW);
443*0Sstevel@tonic-gate 				break;
444*0Sstevel@tonic-gate 			case '>':
445*0Sstevel@tonic-gate 				if (*sp == BOW)
446*0Sstevel@tonic-gate 					return badpat("Null pattern inside \\<\\>");
447*0Sstevel@tonic-gate 				store(EOW);
448*0Sstevel@tonic-gate 				break;
449*0Sstevel@tonic-gate 			case '1':
450*0Sstevel@tonic-gate 			case '2':
451*0Sstevel@tonic-gate 			case '3':
452*0Sstevel@tonic-gate 			case '4':
453*0Sstevel@tonic-gate 			case '5':
454*0Sstevel@tonic-gate 			case '6':
455*0Sstevel@tonic-gate 			case '7':
456*0Sstevel@tonic-gate 			case '8':
457*0Sstevel@tonic-gate 			case '9':
458*0Sstevel@tonic-gate 				n = *p-'0';
459*0Sstevel@tonic-gate 				if (tagi > 0 && tagstk[tagi] == n)
460*0Sstevel@tonic-gate 					return badpat("Cyclical reference");
461*0Sstevel@tonic-gate 				if (tagc > n) {
462*0Sstevel@tonic-gate 					store(REF);
463*0Sstevel@tonic-gate 					store(n);
464*0Sstevel@tonic-gate 				}
465*0Sstevel@tonic-gate 				else
466*0Sstevel@tonic-gate 					return badpat("Undetermined reference");
467*0Sstevel@tonic-gate 				break;
468*0Sstevel@tonic-gate #ifdef EXTEND
469*0Sstevel@tonic-gate 			case 'b':
470*0Sstevel@tonic-gate 				store(CHR);
471*0Sstevel@tonic-gate 				store('\b');
472*0Sstevel@tonic-gate 				break;
473*0Sstevel@tonic-gate 			case 'n':
474*0Sstevel@tonic-gate 				store(CHR);
475*0Sstevel@tonic-gate 				store('\n');
476*0Sstevel@tonic-gate 				break;
477*0Sstevel@tonic-gate 			case 'f':
478*0Sstevel@tonic-gate 				store(CHR);
479*0Sstevel@tonic-gate 				store('\f');
480*0Sstevel@tonic-gate 				break;
481*0Sstevel@tonic-gate 			case 'r':
482*0Sstevel@tonic-gate 				store(CHR);
483*0Sstevel@tonic-gate 				store('\r');
484*0Sstevel@tonic-gate 				break;
485*0Sstevel@tonic-gate 			case 't':
486*0Sstevel@tonic-gate 				store(CHR);
487*0Sstevel@tonic-gate 				store('\t');
488*0Sstevel@tonic-gate 				break;
489*0Sstevel@tonic-gate #endif
490*0Sstevel@tonic-gate 			default:
491*0Sstevel@tonic-gate 				store(CHR);
492*0Sstevel@tonic-gate 				store(*p);
493*0Sstevel@tonic-gate 			}
494*0Sstevel@tonic-gate 			break;
495*0Sstevel@tonic-gate 
496*0Sstevel@tonic-gate 		default :               /* an ordinary char  */
497*0Sstevel@tonic-gate 			store(CHR);
498*0Sstevel@tonic-gate 			store(*p);
499*0Sstevel@tonic-gate 			break;
500*0Sstevel@tonic-gate 		}
501*0Sstevel@tonic-gate 		sp = lp;
502*0Sstevel@tonic-gate 	}
503*0Sstevel@tonic-gate 	if (tagi > 0)
504*0Sstevel@tonic-gate 		return badpat("Unmatched \\(");
505*0Sstevel@tonic-gate 	store(END);
506*0Sstevel@tonic-gate 	sta = OKP;
507*0Sstevel@tonic-gate 	return 0;
508*0Sstevel@tonic-gate }
509*0Sstevel@tonic-gate 
510*0Sstevel@tonic-gate 
511*0Sstevel@tonic-gate static char *bol;
512*0Sstevel@tonic-gate char *bopat[MAXTAG];
513*0Sstevel@tonic-gate char *eopat[MAXTAG];
514*0Sstevel@tonic-gate #ifdef NEEDPROTOS
515*0Sstevel@tonic-gate static char *pmatch( char *lp, CHAR *ap );
516*0Sstevel@tonic-gate #else /* NEEDPROTOS */
517*0Sstevel@tonic-gate static char *pmatch();
518*0Sstevel@tonic-gate #endif /* NEEDPROTOS */
519*0Sstevel@tonic-gate 
520*0Sstevel@tonic-gate /*
521*0Sstevel@tonic-gate  * re_exec:
522*0Sstevel@tonic-gate  * 	execute nfa to find a match.
523*0Sstevel@tonic-gate  *
524*0Sstevel@tonic-gate  *	special cases: (nfa[0])
525*0Sstevel@tonic-gate  *		BOL
526*0Sstevel@tonic-gate  *			Match only once, starting from the
527*0Sstevel@tonic-gate  *			beginning.
528*0Sstevel@tonic-gate  *		CHR
529*0Sstevel@tonic-gate  *			First locate the character without
530*0Sstevel@tonic-gate  *			calling pmatch, and if found, call
531*0Sstevel@tonic-gate  *			pmatch for the remaining string.
532*0Sstevel@tonic-gate  *		END
533*0Sstevel@tonic-gate  *			re_comp failed, poor luser did not
534*0Sstevel@tonic-gate  *			check for it. Fail fast.
535*0Sstevel@tonic-gate  *
536*0Sstevel@tonic-gate  *	If a match is found, bopat[0] and eopat[0] are set
537*0Sstevel@tonic-gate  *	to the beginning and the end of the matched fragment,
538*0Sstevel@tonic-gate  *	respectively.
539*0Sstevel@tonic-gate  *
540*0Sstevel@tonic-gate  */
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate int
543*0Sstevel@tonic-gate re_exec( char *lp )
544*0Sstevel@tonic-gate {
545*0Sstevel@tonic-gate 	register char c;
546*0Sstevel@tonic-gate 	register char *ep = 0;
547*0Sstevel@tonic-gate 	register CHAR *ap = nfa;
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 	bol = lp;
550*0Sstevel@tonic-gate 
551*0Sstevel@tonic-gate 	bopat[0] = 0;
552*0Sstevel@tonic-gate 	bopat[1] = 0;
553*0Sstevel@tonic-gate 	bopat[2] = 0;
554*0Sstevel@tonic-gate 	bopat[3] = 0;
555*0Sstevel@tonic-gate 	bopat[4] = 0;
556*0Sstevel@tonic-gate 	bopat[5] = 0;
557*0Sstevel@tonic-gate 	bopat[6] = 0;
558*0Sstevel@tonic-gate 	bopat[7] = 0;
559*0Sstevel@tonic-gate 	bopat[8] = 0;
560*0Sstevel@tonic-gate 	bopat[9] = 0;
561*0Sstevel@tonic-gate 
562*0Sstevel@tonic-gate 	switch(*ap) {
563*0Sstevel@tonic-gate 
564*0Sstevel@tonic-gate 	case BOL:			/* anchored: match from BOL only */
565*0Sstevel@tonic-gate 		ep = pmatch(lp,ap);
566*0Sstevel@tonic-gate 		break;
567*0Sstevel@tonic-gate 	case CHR:			/* ordinary char: locate it fast */
568*0Sstevel@tonic-gate 		c = *(ap+1);
569*0Sstevel@tonic-gate 		while (*lp && *lp != c)
570*0Sstevel@tonic-gate 			lp++;
571*0Sstevel@tonic-gate 		if (!*lp)		/* if EOS, fail, else fall thru. */
572*0Sstevel@tonic-gate 			return 0;
573*0Sstevel@tonic-gate 	default:			/* regular matching all the way. */
574*0Sstevel@tonic-gate 		do {
575*0Sstevel@tonic-gate 			if ((ep = pmatch(lp,ap)))
576*0Sstevel@tonic-gate 				break;
577*0Sstevel@tonic-gate 			lp++;
578*0Sstevel@tonic-gate 		} while (*lp);
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 		break;
581*0Sstevel@tonic-gate 	case END:			/* munged automaton. fail always */
582*0Sstevel@tonic-gate 		return 0;
583*0Sstevel@tonic-gate 	}
584*0Sstevel@tonic-gate 	if (!ep)
585*0Sstevel@tonic-gate 		return 0;
586*0Sstevel@tonic-gate 
587*0Sstevel@tonic-gate 	bopat[0] = lp;
588*0Sstevel@tonic-gate 	eopat[0] = ep;
589*0Sstevel@tonic-gate 	return 1;
590*0Sstevel@tonic-gate }
591*0Sstevel@tonic-gate 
592*0Sstevel@tonic-gate /*
593*0Sstevel@tonic-gate  * pmatch: internal routine for the hard part
594*0Sstevel@tonic-gate  *
595*0Sstevel@tonic-gate  * 	This code is partly snarfed from an early grep written by
596*0Sstevel@tonic-gate  *	David Conroy. The backref and tag stuff, and various other
597*0Sstevel@tonic-gate  *	innovations are by oz.
598*0Sstevel@tonic-gate  *
599*0Sstevel@tonic-gate  *	special case optimizations: (nfa[n], nfa[n+1])
600*0Sstevel@tonic-gate  *		CLO ANY
601*0Sstevel@tonic-gate  *			We KNOW .* will match everything upto the
602*0Sstevel@tonic-gate  *			end of line. Thus, directly go to the end of
603*0Sstevel@tonic-gate  *			line, without recursive pmatch calls. As in
604*0Sstevel@tonic-gate  *			the other closure cases, the remaining pattern
605*0Sstevel@tonic-gate  *			must be matched by moving backwards on the
606*0Sstevel@tonic-gate  *			string recursively, to find a match for xy
607*0Sstevel@tonic-gate  *			(x is ".*" and y is the remaining pattern)
608*0Sstevel@tonic-gate  *			where the match satisfies the LONGEST match for
609*0Sstevel@tonic-gate  *			x followed by a match for y.
610*0Sstevel@tonic-gate  *		CLO CHR
611*0Sstevel@tonic-gate  *			We can again scan the string forward for the
612*0Sstevel@tonic-gate  *			single char and at the point of failure, we
613*0Sstevel@tonic-gate  *			execute the remaining nfa recursively, same as
614*0Sstevel@tonic-gate  *			above.
615*0Sstevel@tonic-gate  *
616*0Sstevel@tonic-gate  *	At the end of a successful match, bopat[n] and eopat[n]
617*0Sstevel@tonic-gate  *	are set to the beginning and end of subpatterns matched
618*0Sstevel@tonic-gate  *	by tagged expressions (n = 1 to 9).
619*0Sstevel@tonic-gate  *
620*0Sstevel@tonic-gate  */
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate #ifndef re_fail
623*0Sstevel@tonic-gate extern void re_fail();
624*0Sstevel@tonic-gate #endif /* re_fail */
625*0Sstevel@tonic-gate 
626*0Sstevel@tonic-gate /*
627*0Sstevel@tonic-gate  * character classification table for word boundary operators BOW
628*0Sstevel@tonic-gate  * and EOW. the reason for not using ctype macros is that we can
629*0Sstevel@tonic-gate  * let the user add into our own table. see re_modw. This table
630*0Sstevel@tonic-gate  * is not in the bitset form, since we may wish to extend it in the
631*0Sstevel@tonic-gate  * future for other character classifications.
632*0Sstevel@tonic-gate  *
633*0Sstevel@tonic-gate  *	TRUE for 0-9 A-Z a-z _
634*0Sstevel@tonic-gate  */
635*0Sstevel@tonic-gate static char chrtyp[MAXCHR] = {
636*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
637*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
641*0Sstevel@tonic-gate 	1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
642*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
643*0Sstevel@tonic-gate 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644*0Sstevel@tonic-gate 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
645*0Sstevel@tonic-gate 	1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
646*0Sstevel@tonic-gate 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
647*0Sstevel@tonic-gate 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
648*0Sstevel@tonic-gate 	1, 1, 1, 0, 0, 0, 0, 0
649*0Sstevel@tonic-gate 	};
650*0Sstevel@tonic-gate 
651*0Sstevel@tonic-gate #define inascii(x)	(0177&(x))
652*0Sstevel@tonic-gate #define iswordc(x) 	chrtyp[inascii(x)]
653*0Sstevel@tonic-gate #define isinset(x,y) 	((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate /*
656*0Sstevel@tonic-gate  * skip values for CLO XXX to skip past the closure
657*0Sstevel@tonic-gate  */
658*0Sstevel@tonic-gate 
659*0Sstevel@tonic-gate #define ANYSKIP	2 	/* [CLO] ANY END ...	     */
660*0Sstevel@tonic-gate #define CHRSKIP	3	/* [CLO] CHR chr END ...     */
661*0Sstevel@tonic-gate #define CCLSKIP 18	/* [CLO] CCL 16bytes END ... */
662*0Sstevel@tonic-gate 
663*0Sstevel@tonic-gate static char *
664*0Sstevel@tonic-gate pmatch( char *lp, CHAR *ap)
665*0Sstevel@tonic-gate {
666*0Sstevel@tonic-gate 	register int op, c, n;
667*0Sstevel@tonic-gate 	register char *e;		/* extra pointer for CLO */
668*0Sstevel@tonic-gate 	register char *bp;		/* beginning of subpat.. */
669*0Sstevel@tonic-gate 	register char *ep;		/* ending of subpat..	 */
670*0Sstevel@tonic-gate 	char *are;			/* to save the line ptr. */
671*0Sstevel@tonic-gate 
672*0Sstevel@tonic-gate 	while ((op = *ap++) != END)
673*0Sstevel@tonic-gate 		switch(op) {
674*0Sstevel@tonic-gate 
675*0Sstevel@tonic-gate 		case CHR:
676*0Sstevel@tonic-gate 			if (*lp++ != *ap++)
677*0Sstevel@tonic-gate 				return 0;
678*0Sstevel@tonic-gate 			break;
679*0Sstevel@tonic-gate 		case ANY:
680*0Sstevel@tonic-gate 			if (!*lp++)
681*0Sstevel@tonic-gate 				return 0;
682*0Sstevel@tonic-gate 			break;
683*0Sstevel@tonic-gate 		case CCL:
684*0Sstevel@tonic-gate 			c = *lp++;
685*0Sstevel@tonic-gate 			if (!isinset(ap,c))
686*0Sstevel@tonic-gate 				return 0;
687*0Sstevel@tonic-gate 			ap += BITBLK;
688*0Sstevel@tonic-gate 			break;
689*0Sstevel@tonic-gate 		case BOL:
690*0Sstevel@tonic-gate 			if (lp != bol)
691*0Sstevel@tonic-gate 				return 0;
692*0Sstevel@tonic-gate 			break;
693*0Sstevel@tonic-gate 		case EOL:
694*0Sstevel@tonic-gate 			if (*lp)
695*0Sstevel@tonic-gate 				return 0;
696*0Sstevel@tonic-gate 			break;
697*0Sstevel@tonic-gate 		case BOT:
698*0Sstevel@tonic-gate 			bopat[*ap++] = lp;
699*0Sstevel@tonic-gate 			break;
700*0Sstevel@tonic-gate 		case EOT:
701*0Sstevel@tonic-gate 			eopat[*ap++] = lp;
702*0Sstevel@tonic-gate 			break;
703*0Sstevel@tonic-gate  		case BOW:
704*0Sstevel@tonic-gate 			if (lp!=bol && iswordc(lp[-1]) || !iswordc(*lp))
705*0Sstevel@tonic-gate 				return 0;
706*0Sstevel@tonic-gate 			break;
707*0Sstevel@tonic-gate 		case EOW:
708*0Sstevel@tonic-gate 			if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
709*0Sstevel@tonic-gate 				return 0;
710*0Sstevel@tonic-gate 			break;
711*0Sstevel@tonic-gate 		case REF:
712*0Sstevel@tonic-gate 			n = *ap++;
713*0Sstevel@tonic-gate 			bp = bopat[n];
714*0Sstevel@tonic-gate 			ep = eopat[n];
715*0Sstevel@tonic-gate 			while (bp < ep)
716*0Sstevel@tonic-gate 				if (*bp++ != *lp++)
717*0Sstevel@tonic-gate 					return 0;
718*0Sstevel@tonic-gate 			break;
719*0Sstevel@tonic-gate 		case CLO:
720*0Sstevel@tonic-gate 			are = lp;
721*0Sstevel@tonic-gate 			switch(*ap) {
722*0Sstevel@tonic-gate 
723*0Sstevel@tonic-gate 			case ANY:
724*0Sstevel@tonic-gate 				while (*lp)
725*0Sstevel@tonic-gate 					lp++;
726*0Sstevel@tonic-gate 				n = ANYSKIP;
727*0Sstevel@tonic-gate 				break;
728*0Sstevel@tonic-gate 			case CHR:
729*0Sstevel@tonic-gate 				c = *(ap+1);
730*0Sstevel@tonic-gate 				while (*lp && c == *lp)
731*0Sstevel@tonic-gate 					lp++;
732*0Sstevel@tonic-gate 				n = CHRSKIP;
733*0Sstevel@tonic-gate 				break;
734*0Sstevel@tonic-gate 			case CCL:
735*0Sstevel@tonic-gate 				while ((c = *lp) && isinset(ap+1,c))
736*0Sstevel@tonic-gate 					lp++;
737*0Sstevel@tonic-gate 				n = CCLSKIP;
738*0Sstevel@tonic-gate 				break;
739*0Sstevel@tonic-gate 			default:
740*0Sstevel@tonic-gate 				re_fail("closure: bad nfa.", *ap);
741*0Sstevel@tonic-gate 				return 0;
742*0Sstevel@tonic-gate 			}
743*0Sstevel@tonic-gate 
744*0Sstevel@tonic-gate 			ap += n;
745*0Sstevel@tonic-gate 
746*0Sstevel@tonic-gate 			while (lp >= are) {
747*0Sstevel@tonic-gate 				if (e = pmatch(lp, ap))
748*0Sstevel@tonic-gate 					return e;
749*0Sstevel@tonic-gate 				--lp;
750*0Sstevel@tonic-gate 			}
751*0Sstevel@tonic-gate 			return 0;
752*0Sstevel@tonic-gate 		default:
753*0Sstevel@tonic-gate 			re_fail("re_exec: bad nfa.", op);
754*0Sstevel@tonic-gate 			return 0;
755*0Sstevel@tonic-gate 		}
756*0Sstevel@tonic-gate 	return lp;
757*0Sstevel@tonic-gate }
758*0Sstevel@tonic-gate 
759*0Sstevel@tonic-gate /*
760*0Sstevel@tonic-gate  * re_modw:
761*0Sstevel@tonic-gate  *	add new characters into the word table to change re_exec's
762*0Sstevel@tonic-gate  *	understanding of what a word should look like. Note that we
763*0Sstevel@tonic-gate  *	only accept additions into the word definition.
764*0Sstevel@tonic-gate  *
765*0Sstevel@tonic-gate  *	If the string parameter is 0 or null string, the table is
766*0Sstevel@tonic-gate  *	reset back to the default containing A-Z a-z 0-9 _. [We use
767*0Sstevel@tonic-gate  *	the compact bitset representation for the default table]
768*0Sstevel@tonic-gate  */
769*0Sstevel@tonic-gate 
770*0Sstevel@tonic-gate static CHAR deftab[16] = {
771*0Sstevel@tonic-gate 	0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
772*0Sstevel@tonic-gate 	0376, 0377, 0377, 007
773*0Sstevel@tonic-gate };
774*0Sstevel@tonic-gate 
775*0Sstevel@tonic-gate void
776*0Sstevel@tonic-gate re_modw( char *s )
777*0Sstevel@tonic-gate {
778*0Sstevel@tonic-gate 	register int i;
779*0Sstevel@tonic-gate 
780*0Sstevel@tonic-gate 	if (!s || !*s) {
781*0Sstevel@tonic-gate 		for (i = 0; i < MAXCHR; i++)
782*0Sstevel@tonic-gate 			if (!isinset(deftab,i))
783*0Sstevel@tonic-gate 				iswordc(i) = 0;
784*0Sstevel@tonic-gate 	}
785*0Sstevel@tonic-gate 	else
786*0Sstevel@tonic-gate 		while(*s)
787*0Sstevel@tonic-gate 			iswordc(*s++) = 1;
788*0Sstevel@tonic-gate }
789*0Sstevel@tonic-gate 
790*0Sstevel@tonic-gate /*
791*0Sstevel@tonic-gate  * re_subs:
792*0Sstevel@tonic-gate  *	substitute the matched portions of the src in dst.
793*0Sstevel@tonic-gate  *
794*0Sstevel@tonic-gate  *	&	substitute the entire matched pattern.
795*0Sstevel@tonic-gate  *
796*0Sstevel@tonic-gate  *	\digit	substitute a subpattern, with the given	tag number.
797*0Sstevel@tonic-gate  *		Tags are numbered from 1 to 9. If the particular
798*0Sstevel@tonic-gate  *		tagged subpattern does not exist, null is substituted.
799*0Sstevel@tonic-gate  */
800*0Sstevel@tonic-gate int
801*0Sstevel@tonic-gate re_subs( char *src, char *dst)
802*0Sstevel@tonic-gate {
803*0Sstevel@tonic-gate 	register char c;
804*0Sstevel@tonic-gate 	register int  pin;
805*0Sstevel@tonic-gate 	register char *bp;
806*0Sstevel@tonic-gate 	register char *ep;
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate 	if (!*src || !bopat[0])
809*0Sstevel@tonic-gate 		return 0;
810*0Sstevel@tonic-gate 
811*0Sstevel@tonic-gate 	while (c = *src++) {
812*0Sstevel@tonic-gate 		switch(c) {
813*0Sstevel@tonic-gate 
814*0Sstevel@tonic-gate 		case '&':
815*0Sstevel@tonic-gate 			pin = 0;
816*0Sstevel@tonic-gate 			break;
817*0Sstevel@tonic-gate 
818*0Sstevel@tonic-gate 		case '\\':
819*0Sstevel@tonic-gate 			c = *src++;
820*0Sstevel@tonic-gate 			if (c >= '0' && c <= '9') {
821*0Sstevel@tonic-gate 				pin = c - '0';
822*0Sstevel@tonic-gate 				break;
823*0Sstevel@tonic-gate 			}
824*0Sstevel@tonic-gate 
825*0Sstevel@tonic-gate 		default:
826*0Sstevel@tonic-gate 			*dst++ = c;
827*0Sstevel@tonic-gate 			continue;
828*0Sstevel@tonic-gate 		}
829*0Sstevel@tonic-gate 
830*0Sstevel@tonic-gate 		if ((bp = bopat[pin]) && (ep = eopat[pin])) {
831*0Sstevel@tonic-gate 			while (*bp && bp < ep)
832*0Sstevel@tonic-gate 				*dst++ = *bp++;
833*0Sstevel@tonic-gate 			if (bp < ep)
834*0Sstevel@tonic-gate 				return 0;
835*0Sstevel@tonic-gate 		}
836*0Sstevel@tonic-gate 	}
837*0Sstevel@tonic-gate 	*dst = (char) 0;
838*0Sstevel@tonic-gate 	return 1;
839*0Sstevel@tonic-gate }
840*0Sstevel@tonic-gate 
841*0Sstevel@tonic-gate #ifdef DEBUG
842*0Sstevel@tonic-gate /*
843*0Sstevel@tonic-gate  * symbolic - produce a symbolic dump of the nfa
844*0Sstevel@tonic-gate  */
845*0Sstevel@tonic-gate void
846*0Sstevel@tonic-gate symbolic( char *s )
847*0Sstevel@tonic-gate {
848*0Sstevel@tonic-gate 	(void) printf("pattern: %s\n", s);
849*0Sstevel@tonic-gate 	(void) printf("nfacode:\n");
850*0Sstevel@tonic-gate 	nfadump(nfa);
851*0Sstevel@tonic-gate }
852*0Sstevel@tonic-gate 
853*0Sstevel@tonic-gate static void
854*0Sstevel@tonic-gate nfadump( CHAR *ap)
855*0Sstevel@tonic-gate {
856*0Sstevel@tonic-gate 	register int n;
857*0Sstevel@tonic-gate 
858*0Sstevel@tonic-gate 	while (*ap != END)
859*0Sstevel@tonic-gate 		switch(*ap++) {
860*0Sstevel@tonic-gate 		case CLO:
861*0Sstevel@tonic-gate 			(void) printf("CLOSURE");
862*0Sstevel@tonic-gate 			nfadump(ap);
863*0Sstevel@tonic-gate 			switch(*ap) {
864*0Sstevel@tonic-gate 			case CHR:
865*0Sstevel@tonic-gate 				n = CHRSKIP;
866*0Sstevel@tonic-gate 				break;
867*0Sstevel@tonic-gate 			case ANY:
868*0Sstevel@tonic-gate 				n = ANYSKIP;
869*0Sstevel@tonic-gate 				break;
870*0Sstevel@tonic-gate 			case CCL:
871*0Sstevel@tonic-gate 				n = CCLSKIP;
872*0Sstevel@tonic-gate 				break;
873*0Sstevel@tonic-gate 			}
874*0Sstevel@tonic-gate 			ap += n;
875*0Sstevel@tonic-gate 			break;
876*0Sstevel@tonic-gate 		case CHR:
877*0Sstevel@tonic-gate 			(void) printf("\tCHR %c\n",*ap++);
878*0Sstevel@tonic-gate 			break;
879*0Sstevel@tonic-gate 		case ANY:
880*0Sstevel@tonic-gate 			(void) printf("\tANY .\n");
881*0Sstevel@tonic-gate 			break;
882*0Sstevel@tonic-gate 		case BOL:
883*0Sstevel@tonic-gate 			(void) printf("\tBOL -\n");
884*0Sstevel@tonic-gate 			break;
885*0Sstevel@tonic-gate 		case EOL:
886*0Sstevel@tonic-gate 			(void) printf("\tEOL -\n");
887*0Sstevel@tonic-gate 			break;
888*0Sstevel@tonic-gate 		case BOT:
889*0Sstevel@tonic-gate 			(void) printf("BOT: %d\n",*ap++);
890*0Sstevel@tonic-gate 			break;
891*0Sstevel@tonic-gate 		case EOT:
892*0Sstevel@tonic-gate 			(void) printf("EOT: %d\n",*ap++);
893*0Sstevel@tonic-gate 			break;
894*0Sstevel@tonic-gate 		case BOW:
895*0Sstevel@tonic-gate 			(void) printf("BOW\n");
896*0Sstevel@tonic-gate 			break;
897*0Sstevel@tonic-gate 		case EOW:
898*0Sstevel@tonic-gate 			(void) printf("EOW\n");
899*0Sstevel@tonic-gate 			break;
900*0Sstevel@tonic-gate 		case REF:
901*0Sstevel@tonic-gate 			(void) printf("REF: %d\n",*ap++);
902*0Sstevel@tonic-gate 			break;
903*0Sstevel@tonic-gate 		case CCL:
904*0Sstevel@tonic-gate 			(void) printf("\tCCL [");
905*0Sstevel@tonic-gate 			for (n = 0; n < MAXCHR; n++)
906*0Sstevel@tonic-gate 				if (isinset(ap,(CHAR)n)) {
907*0Sstevel@tonic-gate 					if (n < ' ')
908*0Sstevel@tonic-gate 						(void) printf("^%c", n ^ 0x040);
909*0Sstevel@tonic-gate 					else
910*0Sstevel@tonic-gate 						(void) printf("%c", n);
911*0Sstevel@tonic-gate 				}
912*0Sstevel@tonic-gate 			(void) printf("]\n");
913*0Sstevel@tonic-gate 			ap += BITBLK;
914*0Sstevel@tonic-gate 			break;
915*0Sstevel@tonic-gate 		default:
916*0Sstevel@tonic-gate 			(void) printf("bad nfa. opcode %o\n", ap[-1]);
917*0Sstevel@tonic-gate 			exit(1);
918*0Sstevel@tonic-gate 			break;
919*0Sstevel@tonic-gate 		}
920*0Sstevel@tonic-gate }
921*0Sstevel@tonic-gate #endif
922*0Sstevel@tonic-gate #endif /* MACOS or DOS or NEED_BSDREGEX */
923