xref: /onnv-gate/usr/src/lib/libpp/common/ppfsm.h (revision 10898:1883b621b3ea)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*10898Sroland.mainz@nrubsig.org *          Copyright (c) 1986-2009 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                                                                      *
194887Schin ***********************************************************************/
204887Schin #pragma prototyped
214887Schin /*
224887Schin  * Glenn Fowler
234887Schin  * AT&T Research
244887Schin  *
254887Schin  * preprocessor lexical analyzer definitions
264887Schin  */
274887Schin 
284887Schin #ifndef _PPFSM_H
294887Schin #define _PPFSM_H
304887Schin 
314887Schin #define BITSTATE	16		/* bitsof(state)		*/
324887Schin #define BITNONTERM	7		/* bitsof(non-terminal-state)	*/
334887Schin #define BITTERM		7		/* bitsof(terminal-state)	*/
344887Schin #define NMAC		19		/* number of MAC states		*/
354887Schin 
364887Schin #define SPLICE		(1<<BITTERM)
374887Schin 
384887Schin #define	CODE(tok,act)	((((tok)-N_PP)<<(BITTERM+1))|(act))
394887Schin #define TERM(st)	((st)&((1<<(BITTERM+1))-1))
404887Schin #define NEXT(st)	(((st)>>(BITTERM+1))&((1<<BITNONTERM)-1))
414887Schin #define QUAL(st)	(((st)<<(BITTERM+1))|(S_QUAL))
424887Schin #define	TYPE(st)	(NEXT(st)+N_PP)
434887Schin 
444887Schin #define BACK(tok)	CODE(tok,S_TOKB)
454887Schin #define KEEP(tok)	CODE(tok,S_TOK)
464887Schin 
474887Schin #undef	MAX
484887Schin #define MAX		255
494887Schin 
504887Schin #undef	EOB
514887Schin #define EOB		0
524887Schin #undef	EOF
534887Schin #define EOF		(MAX+1)
544887Schin 
554887Schin /*
564887Schin  * FSM states
574887Schin  *
584887Schin  * NOTE: preserve the ranges
594887Schin  */
604887Schin 
614887Schin #define INDEX(p)	(((p)-fsm[0])/(MAX+1))
624887Schin 
634887Schin #define IDSTATE(x)	(((x)>=0&&INQMACRO(fsm[x]))?QID:(x))
644887Schin 
654887Schin #define INCOMMENT(p)	((p)>=fsm[COM2]&&(p)<=fsm[COM7])
664887Schin #define INCOMMENTXX(p)	((p)>=fsm[COM5]&&(p)<=fsm[COM7])
674887Schin #define INQMACRO(p)	((p)>=fsm[MAC0]&&(p)<=fsm[LIT0])
684887Schin #define INTMACRO(p)	((p)>=fsm[NID]&&(p)<=fsm[LIT])
694887Schin #define INQUOTE(p)	((p)>=fsm[LIT1]&&(p)<=fsm[LIT2])
704887Schin #define INOPSPACE(p)	((p)==fsm[BIN1])
714887Schin #define INSPACE(p)	((p)==fsm[WS1])
724887Schin 
734887Schin /*
744887Schin  * proto non-terminal states
754887Schin  */
764887Schin 
774887Schin #define PROTO		0
784887Schin #define RES1		(PROTO+1)
794887Schin #define RES1a		(PROTO+2)
804887Schin #define RES1e		(PROTO+3)
814887Schin #define RES1f		(PROTO+4)
824887Schin #define RES1h		(PROTO+5)
834887Schin #define RES1l		(PROTO+6)
844887Schin #define RES1n		(PROTO+7)
854887Schin #define RES1o		(PROTO+8)
864887Schin #define RES1t		(PROTO+9)
874887Schin #define RES1x		(PROTO+10)
884887Schin #define RES1y		(PROTO+11)
894887Schin #define COM1		(PROTO+12)
904887Schin #define COM2		(PROTO+13)
914887Schin #define COM3		(PROTO+14)
924887Schin #define COM4		(PROTO+15)
934887Schin #define COM5		(PROTO+16)
944887Schin #define COM6		(PROTO+17)
954887Schin #define COM7		(PROTO+18)
964887Schin #define NID		(PROTO+19)
974887Schin #define LIT		(PROTO+20)
984887Schin #define LIT1		(PROTO+21)
994887Schin #define LIT2		(PROTO+22)
1004887Schin #define BAD1		(PROTO+23)
1014887Schin #define BAD2		(PROTO+24)
1024887Schin #define DOT		(PROTO+25)
1034887Schin #define DOT2		(PROTO+26)
1044887Schin #define WS1		(PROTO+27)
1054887Schin 
1064887Schin #if PROTOMAIN
1074887Schin 
1084887Schin #define TERMINAL	(PROTO+28)	/* PROTOMAIN */
1094887Schin 
1104887Schin #else
1114887Schin 
1124887Schin /*
1134887Schin  * quick non-terminal states
1144887Schin  */
1154887Schin 
1164887Schin #define QUICK		(PROTO+28)
1174887Schin #define QTOK		(QUICK+1)
1184887Schin #define QNUM		(QUICK+2)
1194887Schin #define QEXP		(QUICK+3)
1204887Schin #define QCOM		(QUICK+4)
1214887Schin #define QID		(QUICK+5)
1224887Schin #define MAC0		(QUICK+6)
1234887Schin #define MACN		(MAC0+NMAC-1)
1244887Schin #define HIT0		(MACN+1)
1254887Schin #define HITN		(HIT0+NMAC-1)
1264887Schin #define LIT0		(HITN+1)
1274887Schin #define SHARP1		(HITN+2)
1284887Schin 
1294887Schin /*
1304887Schin  * tokenize non-terminal states
1314887Schin  */
1324887Schin 
1334887Schin #define TOKEN		(HITN+3)
1344887Schin #define OCT1		(TOKEN+1)
1354887Schin #define OCT2		(TOKEN+2)
1364887Schin #define OCT3		(TOKEN+3)
1374887Schin #define NOT1		(TOKEN+4)
1384887Schin #define PCT1		(TOKEN+5)
1394887Schin #define AND1		(TOKEN+6)
1404887Schin #define STAR1		(TOKEN+7)
1414887Schin #define PLUS1		(TOKEN+8)
1424887Schin #define MINUS1		(TOKEN+9)
1434887Schin #define ARROW1		(TOKEN+10)
1444887Schin #define COLON1		(TOKEN+11)
1454887Schin #define LT1		(TOKEN+12)
1464887Schin #define LSH1		(TOKEN+13)
1474887Schin #define EQ1		(TOKEN+14)
1484887Schin #define RSH1		(TOKEN+15)
1494887Schin #define GT1		(TOKEN+16)
1504887Schin #define CIRC1		(TOKEN+17)
1514887Schin #define OR1		(TOKEN+18)
1524887Schin #define DEC1		(TOKEN+19)
1534887Schin #define DEC2		(TOKEN+20)
1544887Schin #define HEX1		(TOKEN+21)
1554887Schin #define HEX2		(TOKEN+22)
1564887Schin #define HEX3		(TOKEN+23)
1574887Schin #define HEX4		(TOKEN+24)
1584887Schin #define HEX5		(TOKEN+25)
1594887Schin #define HEX6		(TOKEN+26)
1604887Schin #define HEX7		(TOKEN+27)
1614887Schin #define HEX8		(TOKEN+28)
1624887Schin #define DBL1		(TOKEN+29)
1634887Schin #define DBL2		(TOKEN+30)
1644887Schin #define DBL3		(TOKEN+31)
1654887Schin #define DBL4		(TOKEN+32)
1664887Schin #define DBL5		(TOKEN+33)
1674887Schin #define DOT1		(TOKEN+34)
1684887Schin #define HDR1		(TOKEN+35)
1694887Schin #define BIN1		(TOKEN+36)
1704887Schin 
1714887Schin #define TERMINAL	(TOKEN+37)
1724887Schin 
1734887Schin #endif
1744887Schin 
1754887Schin /*
1764887Schin  * quick terminal states grouped together
1774887Schin  */
1784887Schin 
1794887Schin #define S_CHRB		(TERMINAL+0)
1804887Schin #define S_COMMENT	(TERMINAL+1)
1814887Schin #define S_EOB		(TERMINAL+2)
1824887Schin #define S_LITBEG	(TERMINAL+3)
1834887Schin #define S_LITEND	(TERMINAL+4)
1844887Schin #define S_LITESC	(TERMINAL+5)
1854887Schin #define S_MACRO		(TERMINAL+6)
1864887Schin #define S_NL		(TERMINAL+7)
1874887Schin #define S_QUAL		(TERMINAL+8)
1884887Schin #define S_SHARP		(TERMINAL+9)
1894887Schin #define S_VS		(TERMINAL+10)
1904887Schin 
1914887Schin /*
1924887Schin  * and the remaining terminal states
1934887Schin  */
1944887Schin 
1954887Schin #define S_CHR		(TERMINAL+11)
1964887Schin #define S_HUH		(TERMINAL+12)
1974887Schin #define S_TOK		(TERMINAL+13)
1984887Schin #define S_TOKB		(TERMINAL+14)
1994887Schin #define S_WS		(TERMINAL+15)
2004887Schin 
2014887Schin #define S_RESERVED	(S_HUH)
2024887Schin 
2034887Schin /*
2044887Schin  * the last terminal state (for tracing)
2054887Schin  */
2064887Schin 
2074887Schin #define LAST		(S_WS)
2084887Schin 
2094887Schin /*
2104887Schin  * pseudo terminal states
2114887Schin  */
2124887Schin 
2134887Schin #define S_EOF		(0)
2144887Schin 
2154887Schin /*
2164887Schin  * common lex macros
2174887Schin  *
2184887Schin  * NOTE: common local variable names assumed
2194887Schin  */
2204887Schin 
2214887Schin #define GET(p,c,tp,xp)	\
2224887Schin 	do \
2234887Schin 	{ \
2244887Schin 		if ((c = GETCHR()) == EOB && pp.in->type == IN_FILE) \
2254887Schin 			FGET(p, c, tp, xp); \
2264887Schin 	} while (0)
2274887Schin 
2284887Schin #define FGET(p,c,tp,xp)	\
2294887Schin 	do \
2304887Schin 	{ \
2314887Schin 		if (op > xp + PPTOKSIZ) \
2324887Schin 		{ \
2334887Schin 			if (!INCOMMENT(rp) && !(pp.state & (NOTEXT|SKIPCONTROL))) \
2344887Schin 				error(2, "long token truncated"); \
2354887Schin 			op = xp + PPTOKSIZ; \
2364887Schin 		} \
2374887Schin 		if ((pp.in->flags & IN_flush) && pp.level == 1 && !INMACRO(rp) && (!pp.comment || !INCOMMENT(rp)) && (c = op - pp.outbuf) > 0 && *(op - 1) == '\n') \
2384887Schin 		{ \
2394887Schin 			PPWRITE(c); \
2404887Schin 			op = tp = pp.outp = pp.outbuf; \
2414887Schin 		} \
2424887Schin 		SYNCIN(); \
2434887Schin 		refill(p); \
2444887Schin 		CACHEIN(); \
2454887Schin 		if ((c = GETCHR()) == EOB) BACKIN(); \
2464887Schin 	} while (0)
2474887Schin 
2484887Schin #define POP()		\
2494887Schin 	do \
2504887Schin 	{ \
2514887Schin 		debug((-7, "POP  in=%s next=%s state=%s", ppinstr(cur), pptokchr(*prv->nextchr), pplexstr(INDEX(rp)))); \
2524887Schin 		ip = (pp.in = prv)->nextchr; \
2534887Schin 	} while (0)
2544887Schin 
2554887Schin /*
2564887Schin  * fsm implementaion globals
2574887Schin  */
2584887Schin 
2594887Schin #define fsm		_pp_fsmtab
2604887Schin #define refill		_pp_refill
2614887Schin #define trigraph	_pp_trigraph
2624887Schin 
2634887Schin /*
2644887Schin  * first index is state, second is char, value is next state
2654887Schin  * except for fsm[TERMINAL] where second is state+1 for EOF transition
2664887Schin  */
2674887Schin 
2684887Schin extern short		fsm[TERMINAL+1][MAX+1];
2694887Schin 
2704887Schin /*
2714887Schin  * the index is char, value is trigraph value for <?><?><char>, 0 if invalid
2724887Schin  */
2734887Schin 
2744887Schin extern char		trigraph[MAX+1];
2754887Schin 
2764887Schin extern void		refill(int);
2774887Schin 
2784887Schin #endif
279