xref: /onnv-gate/usr/src/lib/libast/common/regex/reglib.h (revision 12068:08a39a083754)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*12068SRoger.Faulkner@Oracle.COM *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                   Phong Vo <kpv@research.att.com>                    *
204887Schin *                                                                      *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin 
244887Schin /*
254887Schin  * posix regex implementation
264887Schin  *
274887Schin  * based on Doug McIlroy's C++ implementation
284887Schin  * Knuth-Morris-Pratt adapted from Corman-Leiserson-Rivest
294887Schin  * Boyer-Moore from conversations with David Korn, Phong Vo, Andrew Hume
304887Schin  */
314887Schin 
324887Schin #ifndef _REGLIB_H
334887Schin #define _REGLIB_H
344887Schin 
354887Schin #define REG_VERSION_EXEC	20020509L
364887Schin #define REG_VERSION_MAP		20030916L	/* regdisc_t.re_map	*/
374887Schin 
384887Schin #define re_info		env
394887Schin 
404887Schin #define alloc		_reg_alloc
414887Schin #define classfun	_reg_classfun
424887Schin #define drop		_reg_drop
434887Schin #define fatal		_reg_fatal
444887Schin #define state		_reg_state
454887Schin 
464887Schin typedef struct regsubop_s
474887Schin {
484887Schin 	int		op;		/* REG_SUB_LOWER,REG_SUB_UPPER	*/
494887Schin 	int		off;		/* re_rhs or match[] offset	*/
504887Schin 	int		len;		/* re_rhs len or len==0 match[]	*/
514887Schin } regsubop_t;
524887Schin 
534887Schin #define _REG_SUB_PRIVATE_ \
544887Schin 	char*		re_cur;		/* re_buf cursor		*/ \
554887Schin 	char*		re_end;		/* re_buf end			*/ \
564887Schin 	regsubop_t*	re_ops;		/* rhs ops			*/ \
574887Schin 	char		re_rhs[1];	/* substitution rhs		*/
584887Schin 
594887Schin #include <ast.h>
604887Schin #include <cdt.h>
614887Schin #include <stk.h>
624887Schin 
634887Schin #include "regex.h"
644887Schin 
654887Schin #include <ctype.h>
664887Schin #include <errno.h>
674887Schin 
6810898Sroland.mainz@nrubsig.org #if _BLD_DEBUG && !defined(_AST_REGEX_DEBUG)
6910898Sroland.mainz@nrubsig.org #define _AST_REGEX_DEBUG	1
7010898Sroland.mainz@nrubsig.org #endif
7110898Sroland.mainz@nrubsig.org 
724887Schin #define MBSIZE(p)	((ast.tmp_int=mbsize(p))>0?ast.tmp_int:1)
734887Schin 
744887Schin #undef	RE_DUP_MAX			/* posix puts this in limits.h!	*/
754887Schin #define RE_DUP_MAX	(INT_MAX/2-1)	/* 2*RE_DUP_MAX won't overflow	*/
764887Schin #define RE_DUP_INF	(RE_DUP_MAX+1)	/* infinity, for *		*/
774887Schin #define BACK_REF_MAX	9
784887Schin 
798462SApril.Chin@Sun.COM #define REG_COMP	(REG_DELIMITED|REG_ESCAPE|REG_EXTENDED|REG_FIRST|REG_ICASE|REG_NOSUB|REG_NEWLINE|REG_SHELL|REG_AUGMENTED|REG_LEFT|REG_LITERAL|REG_MINIMAL|REG_MULTIREF|REG_NULL|REG_RIGHT|REG_LENIENT|REG_MUSTDELIM)
804887Schin #define REG_EXEC	(REG_ADVANCE|REG_INVERT|REG_NOTBOL|REG_NOTEOL|REG_STARTEND)
814887Schin 
824887Schin #define REX_NULL		0	/* null string (internal)	*/
834887Schin #define REX_ALT			1	/* a|b				*/
844887Schin #define REX_ALT_CATCH		2	/* REX_ALT catcher		*/
854887Schin #define REX_BACK		3	/* \1, \2, etc			*/
864887Schin #define REX_BEG			4	/* initial ^			*/
874887Schin #define REX_BEG_STR		5	/* initial ^ w/ no newline	*/
884887Schin #define REX_BM			6	/* Boyer-Moore			*/
894887Schin #define REX_CAT			7	/* catenation catcher		*/
904887Schin #define REX_CLASS		8	/* [...]			*/
914887Schin #define REX_COLL_CLASS		9	/* collation order [...]	*/
924887Schin #define REX_CONJ		10	/* a&b				*/
934887Schin #define REX_CONJ_LEFT		11	/* REX_CONJ left catcher	*/
944887Schin #define REX_CONJ_RIGHT		12	/* REX_CONJ right catcher	*/
954887Schin #define REX_DONE		13	/* completed match (internal)	*/
964887Schin #define REX_DOT			14	/* .				*/
974887Schin #define REX_END			15	/* final $			*/
984887Schin #define REX_END_STR		16	/* final $ before tail newline	*/
994887Schin #define REX_EXEC		17	/* call re.re_exec()		*/
1004887Schin #define REX_FIN_STR		18	/* final $ w/ no newline	*/
1014887Schin #define REX_GROUP		19	/* \(...\)			*/
1024887Schin #define REX_GROUP_CATCH		20	/* REX_GROUP catcher		*/
1034887Schin #define REX_GROUP_AHEAD		21	/* 0-width lookahead		*/
1044887Schin #define REX_GROUP_AHEAD_CATCH	22	/* REX_GROUP_AHEAD catcher	*/
1054887Schin #define REX_GROUP_AHEAD_NOT	23	/* inverted 0-width lookahead	*/
1064887Schin #define REX_GROUP_BEHIND	24	/* 0-width lookbehind		*/
1074887Schin #define REX_GROUP_BEHIND_CATCH	25	/* REX_GROUP_BEHIND catcher	*/
1084887Schin #define REX_GROUP_BEHIND_NOT	26	/* inverted 0-width lookbehind	*/
1094887Schin #define REX_GROUP_BEHIND_NOT_CATCH 27	/* REX_GROUP_BEHIND_NOT catcher	*/
1104887Schin #define REX_GROUP_COND		28	/* conditional group		*/
1114887Schin #define REX_GROUP_COND_CATCH	29	/* conditional group catcher	*/
1124887Schin #define REX_GROUP_CUT		30	/* don't backtrack over this	*/
1134887Schin #define REX_GROUP_CUT_CATCH	31	/* REX_GROUP_CUT catcher	*/
1144887Schin #define REX_KMP			32	/* Knuth-Morris-Pratt		*/
1154887Schin #define REX_NEG			33	/* negation			*/
1164887Schin #define REX_NEG_CATCH		34	/* REX_NEG catcher		*/
1174887Schin #define REX_NEST		35	/* nested match			*/
1184887Schin #define REX_ONECHAR		36	/* a single-character literal	*/
1194887Schin #define REX_REP			37	/* Kleene closure		*/
1204887Schin #define REX_REP_CATCH		38	/* REX_REP catcher		*/
1214887Schin #define REX_STRING		39	/* some chars			*/
1224887Schin #define REX_TRIE		40	/* alternation of strings	*/
1234887Schin #define REX_WBEG		41	/* \<				*/
1244887Schin #define REX_WEND		42	/* \>				*/
1254887Schin #define REX_WORD		43	/* word boundary		*/
1264887Schin #define REX_WORD_NOT		44	/* not word boundary		*/
1274887Schin 
1284887Schin #define T_META		((int)UCHAR_MAX+1)
1294887Schin #define T_STAR		(T_META+0)
1304887Schin #define T_PLUS		(T_META+1)
1314887Schin #define T_QUES		(T_META+2)
1324887Schin #define T_BANG		(T_META+3)
1334887Schin #define T_AT		(T_META+4)
1344887Schin #define T_TILDE		(T_META+5)
1354887Schin #define T_PERCENT	(T_META+6)
1364887Schin #define T_LEFT		(T_META+7)
1374887Schin #define T_OPEN		(T_META+8)
1384887Schin #define T_CLOSE		(T_OPEN+1)
1394887Schin #define T_RIGHT		(T_OPEN+2)
1404887Schin #define T_CFLX		(T_OPEN+3)
1414887Schin #define T_DOT		(T_OPEN+4)
1424887Schin #define T_DOTSTAR	(T_OPEN+5)
1434887Schin #define T_END		(T_OPEN+6)
1444887Schin #define T_BAD		(T_OPEN+7)
1454887Schin #define T_DOLL		(T_OPEN+8)
1464887Schin #define T_BRA		(T_OPEN+9)
1474887Schin #define T_BAR		(T_OPEN+10)
1484887Schin #define T_AND		(T_OPEN+11)
1494887Schin #define T_LT		(T_OPEN+12)
1504887Schin #define T_GT		(T_OPEN+13)
1514887Schin #define T_SLASHPLUS	(T_OPEN+14)
1524887Schin #define T_GROUP		(T_OPEN+15)
1534887Schin #define T_WORD		(T_OPEN+16)
1544887Schin #define T_WORD_NOT	(T_WORD+1)
1554887Schin #define T_BEG_STR	(T_WORD+2)
1564887Schin #define T_END_STR	(T_WORD+3)
1574887Schin #define T_FIN_STR	(T_WORD+4)
1584887Schin #define T_ESCAPE	(T_WORD+5)
1594887Schin #define T_ALNUM		(T_WORD+6)
1604887Schin #define T_ALNUM_NOT	(T_ALNUM+1)
1614887Schin #define T_DIGIT		(T_ALNUM+2)
1624887Schin #define T_DIGIT_NOT	(T_ALNUM+3)
1634887Schin #define T_SPACE		(T_ALNUM+4)
1644887Schin #define T_SPACE_NOT	(T_ALNUM+5)
1654887Schin #define T_BACK		(T_ALNUM+6)
1664887Schin 
1674887Schin #define BRE		0
1684887Schin #define ERE		3
1694887Schin #define ARE		6
1704887Schin #define SRE		9
1714887Schin #define KRE		12
1724887Schin 
1734887Schin #define HIT		SSIZE_MAX
1744887Schin 
1754887Schin #define bitclr(p,c)	((p)[((c)>>3)&037]&=(~(1<<((c)&07))))
1764887Schin #define bitset(p,c)	((p)[((c)>>3)&037]|=(1<<((c)&07)))
1774887Schin #define bittst(p,c)	((p)[((c)>>3)&037]&(1<<((c)&07)))
1784887Schin 
1794887Schin #define setadd(p,c)	bitset((p)->bits,c)
1804887Schin #define setclr(p,c)	bitclr((p)->bits,c)
1814887Schin #define settst(p,c)	bittst((p)->bits,c)
1824887Schin 
1834887Schin #if _hdr_wchar && _lib_wctype && _lib_iswctype
1844887Schin 
1854887Schin #include <stdio.h> /* because <wchar.h> includes it and we generate it */
1864887Schin #include <wchar.h>
1874887Schin #if _hdr_wctype
1884887Schin #include <wctype.h>
1894887Schin #endif
1904887Schin 
1914887Schin #if !defined(iswblank) && !_lib_iswblank
1924887Schin #define _need_iswblank	1
1934887Schin #define iswblank(x)	_reg_iswblank(x)
1944887Schin extern int		_reg_iswblank(wint_t);
1954887Schin #endif
1964887Schin 
1974887Schin #if !defined(towupper) && !_lib_towupper
1984887Schin #define towupper(x)	toupper(x)
1994887Schin #endif
2004887Schin 
2014887Schin #if !defined(towlower) && !_lib_towlower
2024887Schin #define towlower(x)	tolower(x)
2034887Schin #endif
2044887Schin 
2054887Schin #else
2064887Schin 
2074887Schin #undef	_lib_wctype
2084887Schin 
2094887Schin #ifndef iswalnum
2104887Schin #define iswalnum(x)	isalnum(x)
2114887Schin #endif
2124887Schin #ifndef iswalpha
2134887Schin #define iswalpha(x)	isalpha(x)
2144887Schin #endif
2154887Schin #ifndef iswcntrl
2164887Schin #define iswcntrl(x)	iscntrl(x)
2174887Schin #endif
2184887Schin #ifndef iswdigit
2194887Schin #define iswdigit(x)	isdigit(x)
2204887Schin #endif
2214887Schin #ifndef iswgraph
2224887Schin #define iswgraph(x)	isgraph(x)
2234887Schin #endif
2244887Schin #ifndef iswlower
2254887Schin #define iswlower(x)	islower(x)
2264887Schin #endif
2274887Schin #ifndef iswprint
2284887Schin #define iswprint(x)	isprint(x)
2294887Schin #endif
2304887Schin #ifndef iswpunct
2314887Schin #define iswpunct(x)	ispunct(x)
2324887Schin #endif
2334887Schin #ifndef iswspace
2344887Schin #define iswspace(x)	isspace(x)
2354887Schin #endif
2364887Schin #ifndef iswupper
2374887Schin #define iswupper(x)	isupper(x)
2384887Schin #endif
2394887Schin #ifndef iswxdigit
2404887Schin #define iswxdigit(x)	isxdigit(x)
2414887Schin #endif
2424887Schin 
2434887Schin #ifndef towlower
2444887Schin #define towlower(x)	tolower(x)
2454887Schin #endif
2464887Schin #ifndef towupper
2474887Schin #define towupper(x)	toupper(x)
2484887Schin #endif
2494887Schin 
2504887Schin #endif
2514887Schin 
2524887Schin #ifndef	iswblank
2534887Schin #define	iswblank(x)	((x)==' '||(x)=='\t')
2544887Schin #endif
2554887Schin 
2564887Schin #ifndef iswgraph
2574887Schin #define	iswgraph(x)	(iswprint(x)&&!iswblank(x))
2584887Schin #endif
2594887Schin 
2604887Schin #define isword(x)	(isalnum(x)||(x)=='_')
2614887Schin 
2624887Schin /*
2634887Schin  * collation element support
2644887Schin  */
2654887Schin 
2668462SApril.Chin@Sun.COM #define COLL_KEY_MAX	32
2674887Schin 
2684887Schin #if COLL_KEY_MAX < MB_LEN_MAX
2694887Schin #undef	COLL_KEY_MAX
2704887Schin #define COLL_KEY_MAX	MB_LEN_MAX
2714887Schin #endif
2724887Schin 
2734887Schin typedef unsigned char Ckey_t[COLL_KEY_MAX+1];
2744887Schin 
2754887Schin #define COLL_end	0
2764887Schin #define COLL_call	1
2774887Schin #define COLL_char	2
2784887Schin #define COLL_range	3
2794887Schin #define COLL_range_lc	4
2804887Schin #define COLL_range_uc	5
2814887Schin 
2824887Schin typedef struct Celt_s
2834887Schin {
2844887Schin 	short		typ;
2854887Schin 	short		min;
2864887Schin 	short		max;
2874887Schin 	regclass_t	fun;
2884887Schin 	Ckey_t		beg;
2894887Schin 	Ckey_t		end;
2904887Schin } Celt_t;
2914887Schin 
2924887Schin /*
2934887Schin  * private stuff hanging off regex_t
2944887Schin  */
2954887Schin 
2964887Schin typedef struct Stk_pos_s
2974887Schin {
2984887Schin 	off_t		offset;
2994887Schin 	char*		base;
3004887Schin } Stk_pos_t;
3014887Schin 
3024887Schin typedef struct Vector_s
3034887Schin {
3044887Schin 	Stk_t*		stk;		/* stack pointer		*/
3054887Schin 	char*		vec;		/* the data			*/
3064887Schin 	int		inc;		/* growth increment		*/
3074887Schin 	int		siz;		/* element size			*/
3084887Schin 	int		max;		/* max index			*/
3094887Schin 	int		cur;		/* current index -- user domain	*/
3104887Schin } Vector_t;
3114887Schin 
3124887Schin /*
3134887Schin  * Rex_t subtypes
3144887Schin  */
3154887Schin 
3164887Schin typedef struct Cond_s
3174887Schin {
3184887Schin 	unsigned char*	beg;		/* beginning of next match	*/
3194887Schin 	struct Rex_s*	next[2];	/* 0:no 1:yes next pattern	*/
3204887Schin 	struct Rex_s*	cont;		/* right catcher		*/
3214887Schin 	int		yes;		/* yes condition hit		*/
3224887Schin } Cond_t;
3234887Schin 
3244887Schin typedef struct Conj_left_s
3254887Schin {
3264887Schin 	unsigned char*	beg;		/* beginning of left match	*/
3274887Schin 	struct Rex_s*	right;		/* right pattern		*/
3284887Schin 	struct Rex_s*	cont;		/* right catcher		*/
3294887Schin } Conj_left_t;
3304887Schin 
3314887Schin typedef struct Conj_right_s
3324887Schin {
3334887Schin 	unsigned char*	end;		/* end of left match		*/
3344887Schin 	struct Rex_s*	cont;		/* ambient continuation		*/
3354887Schin } Conj_right_t;
3364887Schin 
3374887Schin typedef unsigned int Bm_mask_t;
3384887Schin 
3394887Schin typedef struct Bm_s
3404887Schin {
3414887Schin 	Bm_mask_t**	mask;
3424887Schin 	size_t*		skip;
3434887Schin 	size_t*		fail;
3444887Schin 	size_t		size;
3454887Schin 	ssize_t		back;
3464887Schin 	ssize_t		left;
3474887Schin 	ssize_t		right;
3484887Schin 	size_t		complete;
3494887Schin } Bm_t;
3504887Schin 
3514887Schin typedef struct String_s
3524887Schin {
3534887Schin 	int*		fail;
3544887Schin 	unsigned char*	base;
3554887Schin 	size_t		size;
3564887Schin } String_t;
3574887Schin 
3584887Schin typedef struct Set_s
3594887Schin {
3604887Schin 	unsigned char	bits[(UCHAR_MAX+1)/CHAR_BIT];
3614887Schin } Set_t;
3624887Schin 
3634887Schin typedef struct Collate_s
3644887Schin {
3654887Schin 	int		invert;
3664887Schin 	Celt_t*		elements;
3674887Schin } Collate_t;
3684887Schin 
3694887Schin typedef struct Binary_s
3704887Schin {
3714887Schin 	struct Rex_s*	left;
3724887Schin 	struct Rex_s*	right;
3734887Schin 	int		serial;
3744887Schin } Binary_t;
3754887Schin 
3764887Schin typedef struct Group_s
3774887Schin {
3784887Schin 	int		number;		/* group number			*/
3794887Schin 	int		last;		/* last contained group number	*/
3804887Schin 	int		size;		/* lookbehind size		*/
3814887Schin 	int		back;		/* backreferenced		*/
3824887Schin 	regflags_t	flags;		/* group flags			*/
3834887Schin 	union
3844887Schin 	{
3854887Schin 	Binary_t	binary;
3864887Schin 	struct Rex_s*	rex;
3874887Schin 	}		expr;
3884887Schin } Group_t;
3894887Schin 
3904887Schin typedef struct Exec_s
3914887Schin {
3924887Schin 	void*		data;
3934887Schin 	const char*	text;
3944887Schin 	size_t		size;
3954887Schin } Exec_t;
3964887Schin 
3974887Schin #define REX_NEST_open		0x01
3984887Schin #define REX_NEST_close		0x02
3994887Schin #define REX_NEST_escape		0x04
4004887Schin #define REX_NEST_quote		0x08
4014887Schin #define REX_NEST_literal	0x10
4024887Schin #define REX_NEST_delimiter	0x20
4034887Schin #define REX_NEST_terminator	0x40
4044887Schin #define REX_NEST_separator	0x80
4054887Schin 
4064887Schin #define REX_NEST_SHIFT		8
4074887Schin 
4084887Schin typedef struct Nest_s
4094887Schin {
4104887Schin 	int		primary;
4114887Schin 	unsigned short	none;		/* for Nest_t.type[-1] */
4124887Schin 	unsigned short	type[1];
4134887Schin } Nest_t;
4144887Schin 
4154887Schin /*
4164887Schin  * REX_ALT catcher, solely to get control at the end of an
4174887Schin  * alternative to keep records for comparing matches.
4184887Schin  */
4194887Schin 
4204887Schin typedef struct Alt_catch_s
4214887Schin {
4224887Schin 	struct Rex_s*	cont;
4234887Schin } Alt_catch_t;
4244887Schin 
4254887Schin typedef struct Group_catch_s
4264887Schin {
4274887Schin 	struct Rex_s*	cont;
4284887Schin 	regoff_t*	eo;
4294887Schin } Group_catch_t;
4304887Schin 
4314887Schin typedef struct Behind_catch_s
4324887Schin {
4334887Schin 	struct Rex_s*	cont;
4344887Schin 	unsigned char*	beg;
4354887Schin 	unsigned char*	end;
4364887Schin } Behind_catch_t;
4374887Schin 
4384887Schin /*
4394887Schin  * REX_NEG catcher determines what string lengths can be matched,
4404887Schin  * then Neg investigates continuations of other lengths.
4414887Schin  * This is inefficient.  For !POSITIONS expressions, we can do better:
4424887Schin  * since matches to rex will be enumerated in decreasing order,
4434887Schin  * we can investigate continuations whenever a length is skipped.
4444887Schin  */
4454887Schin 
4464887Schin typedef struct Neg_catch_s
4474887Schin {
4484887Schin 	unsigned char*	beg;
4494887Schin 	unsigned char*	index;
4504887Schin } Neg_catch_t;
4514887Schin 
4524887Schin /*
4534887Schin  * REX_REP catcher.  One is created on the stack for
4544887Schin  * each iteration of a complex repetition.
4554887Schin  */
4564887Schin 
4574887Schin typedef struct Rep_catch_s
4584887Schin {
4594887Schin 	struct Rex_s*	cont;
4604887Schin 	struct Rex_s*	ref;
4614887Schin 	unsigned char*	beg;
4624887Schin 	int		n;
4634887Schin } Rep_catch_t;
4644887Schin 
4654887Schin /*
4664887Schin  * data structure for an alternation of pure strings
4674887Schin  * son points to a subtree of all strings with a common
4684887Schin  * prefix ending in character c.  sib links alternate
4694887Schin  * letters in the same position of a word.  end=1 if
4704887Schin  * some word ends with c.  the order of strings is
4714887Schin  * irrelevant, except long words must be investigated
4724887Schin  * before short ones.
4734887Schin  */
4744887Schin 
4754887Schin typedef struct Trie_node_s
4764887Schin {
4774887Schin 	unsigned char		c;
4784887Schin 	unsigned char		end;
4794887Schin 	struct Trie_node_s*	son;
4804887Schin 	struct Trie_node_s*	sib;
4814887Schin } Trie_node_t;
4824887Schin 
4834887Schin typedef struct Trie_s
4844887Schin {
4854887Schin 	Trie_node_t**	root;
4864887Schin 	int		min;
4874887Schin 	int		max;
4884887Schin } Trie_t;
4894887Schin 
4904887Schin /*
4914887Schin  * Rex_t is a node in a regular expression
4924887Schin  */
4934887Schin 
4944887Schin typedef struct Rex_s
4954887Schin {
4964887Schin 	unsigned char	type;			/* node type		*/
4974887Schin 	unsigned char	marked;			/* already marked	*/
4984887Schin 	short		serial;			/* subpattern number	*/
4994887Schin 	regflags_t	flags;			/* scoped flags		*/
5004887Schin 	int		explicit;		/* scoped explicit match*/
5014887Schin 	struct Rex_s*	next;			/* remaining parts	*/
5024887Schin 	int		lo;			/* lo dup count		*/
5034887Schin 	int		hi;			/* hi dup count		*/
5044887Schin 	unsigned char*	map;			/* fold and/or ccode map*/
5054887Schin 	union
5064887Schin 	{
5074887Schin 	Alt_catch_t	alt_catch;		/* alt catcher		*/
5084887Schin 	Bm_t		bm;			/* bm			*/
5094887Schin 	Behind_catch_t	behind_catch;		/* behind catcher	*/
5104887Schin 	Set_t*		charclass;		/* char class		*/
5114887Schin 	Collate_t	collate;		/* collation class	*/
5124887Schin 	Cond_t		cond_catch;		/* cond catcher		*/
5134887Schin 	Conj_left_t	conj_left;		/* conj left catcher	*/
5144887Schin 	Conj_right_t	conj_right;		/* conj right catcher	*/
5154887Schin 	void*		data;			/* data after Rex_t	*/
5164887Schin 	Exec_t		exec;			/* re.re_exec() args	*/
5174887Schin 	Group_t		group;			/* a|b or rep		*/
5184887Schin 	Group_catch_t	group_catch;		/* group catcher	*/
5194887Schin 	Neg_catch_t	neg_catch;		/* neg catcher		*/
5204887Schin 	Nest_t		nest;			/* nested match		*/
5214887Schin 	unsigned char	onechar;		/* single char		*/
5224887Schin 	Rep_catch_t	rep_catch;		/* rep catcher		*/
5234887Schin 	String_t	string;			/* string/kmp		*/
5244887Schin 	Trie_t		trie;			/* trie			*/
5254887Schin 	}		re;
5264887Schin } Rex_t;
5274887Schin 
5284887Schin typedef struct reglib_s			/* library private regex_t info	*/
5294887Schin {
5304887Schin 	struct Rex_s*	rex;		/* compiled expression		*/
5314887Schin 	regdisc_t*	disc;		/* REG_DISCIPLINE discipline	*/
5324887Schin 	const regex_t*	regex;		/* from regexec			*/
5334887Schin 	unsigned char*	beg;		/* beginning of string		*/
5344887Schin 	unsigned char*	end;		/* end of string		*/
5354887Schin 	Vector_t*	pos;		/* posns of certain subpatterns	*/
5364887Schin 	Vector_t*	bestpos;	/* ditto for best match		*/
5374887Schin 	regmatch_t*	match;		/* subexrs in current match 	*/
5384887Schin 	regmatch_t*	best;		/* ditto in best match yet	*/
5394887Schin 	Stk_pos_t	stk;		/* exec stack pos		*/
5404887Schin 	size_t		min;		/* minimum match length		*/
5414887Schin 	size_t		nsub;		/* internal re_nsub		*/
5424887Schin 	regflags_t	flags;		/* flags from regcomp()		*/
5434887Schin 	int		error;		/* last error			*/
5444887Schin 	int		explicit;	/* explicit match on this char	*/
5454887Schin 	int		leading;	/* leading match on this char	*/
5464887Schin 	int		refs;		/* regcomp()+regdup() references*/
5474887Schin 	Rex_t		done;		/* the last continuation	*/
5484887Schin 	regstat_t	stats;		/* for regstat()		*/
5494887Schin 	unsigned char	fold[UCHAR_MAX+1]; /* REG_ICASE map		*/
5504887Schin 	unsigned char	hard;		/* hard comp			*/
5514887Schin 	unsigned char	once;		/* if 1st parse fails, quit	*/
5524887Schin 	unsigned char	separate;	/* cannot combine		*/
5534887Schin 	unsigned char	stack;		/* hard comp or exec		*/
5544887Schin 	unsigned char	sub;		/* re_sub is valid		*/
5554887Schin 	unsigned char	test;		/* debug/test bitmask		*/
5564887Schin } Env_t;
5574887Schin 
5584887Schin typedef struct State_s				/* shared state		*/
5594887Schin {
5604887Schin 	regmatch_t	nomatch;
5614887Schin 	struct
5624887Schin 	{
5634887Schin 	unsigned char	key;
5644887Schin 	short		val[15];
5654887Schin 	}		escape[52];
5664887Schin 	short*		magic[UCHAR_MAX+1];
5674887Schin 	regdisc_t	disc;
5684887Schin 	int		fatal;
5694887Schin 	int		initialized;
5704887Schin 	Dt_t*		attrs;
5714887Schin 	Dt_t*		names;
5724887Schin 	Dtdisc_t	dtdisc;
5734887Schin } State_t;
5744887Schin 
5754887Schin extern State_t		state;
5764887Schin 
5774887Schin extern void*		alloc(regdisc_t*, void*, size_t);
5784887Schin extern regclass_t	classfun(int);
5794887Schin extern void		drop(regdisc_t*, Rex_t*);
5804887Schin extern int		fatal(regdisc_t*, int, const char*);
5814887Schin 
5824887Schin #endif
583