14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*10898Sroland.mainz@nrubsig.org * Copyright (c) 1986-2009 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * *
194887Schin ***********************************************************************/
204887Schin #pragma prototyped
214887Schin /*
224887Schin * Glenn Fowler
234887Schin * AT&T Research
244887Schin *
254887Schin * preprocessor and proto lexical analyzer fsm
264887Schin * define PROTOMAIN for standalone proto
274887Schin */
284887Schin
294887Schin #include "pplib.h"
304887Schin #include "ppfsm.h"
314887Schin
324887Schin /*
334887Schin * lexical FSM encoding
344887Schin * derived from a standalone ansi cpp by Dennis Ritchie
354887Schin * modified for libpp by Glenn Fowler
364887Schin *
374887Schin * fsm[] is initialized from fsminit[]. The encoding is blown out into
384887Schin * fsm[] for time efficiency. When in state state, and one of the
394887Schin * characters in ch arrives, enter nextstate. States >= TERMINAL are
404887Schin * either final, or at least require special action. In fsminit[] there
414887Schin * is a line for each <state,charset,nextstate>. Early entries are
424887Schin * overwritten by later ones. C_XXX is the universal set and should
434887Schin * always be first. Some of the fsminit[] entries are templates for
444887Schin * groups of states. The OP entries trigger the state copies. States
454887Schin * above TERMINAL are represented in fsm[] as negative values. S_TOK and
464887Schin * S_TOKB encode the resulting token type in the upper bits. These actions
474887Schin * differ in that S_TOKB has a lookahead char.
484887Schin *
494887Schin * fsm[] has three start states:
504887Schin *
514887Schin * PROTO proto (ANSI -> K&R,C++,ANSI)
524887Schin * QUICK standalone ppcpp()
534887Schin * TOKEN tokenizing pplex()
544887Schin *
554887Schin * If the next state remains the same then the fsm[] transition value is 0.
564887Schin * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
574887Schin * fsm[state+1][0] which is ~S_EOB for all states. This preserves the
584887Schin * power of 2 fsm[] row size for efficient array indexing. Thanks to
594887Schin * D. G. Korn for the last two observations. The pseudo non-terminal state
604887Schin * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
614887Schin *
624887Schin * The bit layout is:
634887Schin *
644887Schin * TERM arg SPLICE next
654887Schin * 15 14-8 7 6-0
664887Schin */
674887Schin
684887Schin /*
694887Schin * NOTE: these must be `control' characters for all native codesets
704887Schin * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
714887Schin */
724887Schin
734887Schin #define C_DEC 001
744887Schin #define C_EOF 002
754887Schin #define C_HEX 003
764887Schin #define C_LET 021
774887Schin #define C_OCT 022
784887Schin #define C_XXX 023
794887Schin
804887Schin #define OP (-1)
814887Schin #define END 0
824887Schin #define COPY 1
834887Schin
844887Schin #define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
854887Schin
864887Schin struct fsminit /* fsm initialization row */
874887Schin {
884887Schin int state; /* if in this state */
894887Schin unsigned char ch[4]; /* and see one of these */
904887Schin int nextstate; /* enter this state if <TERMINAL*/
914887Schin };
924887Schin
934887Schin static struct fsminit fsminit[] =
944887Schin {
954887Schin /* proto start state */
964887Schin { PROTO, { C_XXX }, S_CHR, },
974887Schin { PROTO, { C_EOF }, S_EOF, },
984887Schin { PROTO, { C_DEC }, BAD1, },
994887Schin { PROTO, { '.' }, DOT, },
1004887Schin { PROTO, { C_LET }, NID, },
1014887Schin { PROTO, { 'L' }, LIT, },
1024887Schin { PROTO, { 'd', 'e', 'f', 'i' }, RES1, },
1034887Schin { PROTO, { 'r', 's', 't', 'v' }, RES1, },
1044887Schin { PROTO, { 'w', 'N' }, RES1, },
1054887Schin { PROTO, { '"', '\'' }, S_LITBEG, },
1064887Schin { PROTO, { '/' }, COM1, },
1074887Schin { PROTO, { '\n' }, S_NL, },
1084887Schin { PROTO, { ' ','\t','\f','\v' }, WS1, },
1094887Schin
1104887Schin /* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
1114887Schin { RES1, { C_XXX }, S_MACRO, },
1124887Schin { RES1, { C_LET, C_DEC }, NID, },
1134887Schin { RES1, { 'a' }, RES1a, },
1144887Schin { RES1, { 'e' }, RES1e, },
1154887Schin { RES1, { 'f' }, RES1f, },
1164887Schin { RES1, { 'h' }, RES1h, },
1174887Schin { RES1, { 'l' }, RES1l, },
1184887Schin { RES1, { 'n' }, RES1n, },
1194887Schin { RES1, { 'o' }, RES1o, },
1204887Schin { RES1, { 't' }, RES1t, },
1214887Schin { RES1, { 'x' }, RES1x, },
1224887Schin { RES1, { 'y' }, RES1y, },
1234887Schin
1244887Schin /* proto reserved {va_start} */
1254887Schin { RES1a, { C_XXX }, S_RESERVED, },
1264887Schin { RES1a, { C_LET, C_DEC }, NID, },
1274887Schin { RES1a, { '_','s','t','a' }, RES1a, },
1284887Schin { RES1a, { 'r' }, RES1a, },
1294887Schin
1304887Schin /* proto reserved {return} */
1314887Schin { RES1e, { C_XXX }, S_RESERVED, },
1324887Schin { RES1e, { C_LET, C_DEC }, NID, },
1334887Schin { RES1e, { 't','u','r','n' }, RES1e, },
1344887Schin
1354887Schin /* proto reserved {if} */
1364887Schin { RES1f, { C_XXX }, S_RESERVED, },
1374887Schin { RES1f, { C_LET, C_DEC }, NID, },
1384887Schin
1394887Schin /* proto reserved {while} */
1404887Schin { RES1h, { C_XXX }, S_RESERVED, },
1414887Schin { RES1h, { C_LET, C_DEC }, NID, },
1424887Schin { RES1h, { 'i','l','e' }, RES1h, },
1434887Schin
1444887Schin /* proto reserved {else} */
1454887Schin { RES1l, { C_XXX }, S_RESERVED, },
1464887Schin { RES1l, { C_LET, C_DEC }, NID, },
1474887Schin { RES1l, { 's','e' }, RES1l, },
1484887Schin
1494887Schin /* proto reserved {inline} */
1504887Schin { RES1n, { C_XXX }, S_RESERVED, },
1514887Schin { RES1n, { C_LET, C_DEC }, NID, },
1524887Schin { RES1n, { 'l','i','n','e' }, RES1n, },
1534887Schin
1544887Schin /* proto reserved {do,for,void} */
1554887Schin { RES1o, { C_XXX }, S_RESERVED, },
1564887Schin { RES1o, { C_LET, C_DEC }, NID, },
1574887Schin { RES1o, { 'r','i','d','N' }, RES1o, },
1584887Schin
1594887Schin /* proto reserved {static} */
1604887Schin { RES1t, { C_XXX }, S_RESERVED, },
1614887Schin { RES1t, { C_LET, C_DEC }, NID, },
1624887Schin { RES1t, { 'a','t','i','c' }, RES1t, },
1634887Schin
1644887Schin /* proto reserved {extern} */
1654887Schin { RES1x, { C_XXX }, S_RESERVED, },
1664887Schin { RES1x, { C_LET, C_DEC }, NID, },
1674887Schin { RES1x, { 't','e','r','n' }, RES1x, },
1684887Schin
1694887Schin /* proto reserved {typedef} */
1704887Schin { RES1y, { C_XXX }, S_RESERVED, },
1714887Schin { RES1y, { C_LET, C_DEC }, NID, },
1724887Schin { RES1y, { 'p','e','d','f' }, RES1y, },
1734887Schin
1744887Schin /* saw /, perhaps start of comment */
1754887Schin { COM1, { C_XXX }, S_CHRB, },
1764887Schin { COM1, { '*' }, COM2, },
1774887Schin #if PROTOMAIN
1784887Schin { COM1, { '/' }, COM5, },
1794887Schin #endif
1804887Schin
1814887Schin /* saw / *, start of comment */
1824887Schin { COM2, { C_XXX }, COM2, },
1834887Schin { COM2, { '\n', C_EOF }, S_COMMENT, },
1844887Schin { COM2, { '/' }, COM4, },
1854887Schin { COM2, { '*' }, COM3, },
1864887Schin { COM2, { '#', ';', ')' }, QUAL(COM2), },
1874887Schin
1884887Schin /* saw the * possibly ending a comment */
1894887Schin { COM3, { C_XXX }, COM2, },
1904887Schin { COM3, { '\n', C_EOF }, S_COMMENT, },
1914887Schin { COM3, { '#', ';', ')' }, QUAL(COM2), },
1924887Schin { COM3, { '*' }, COM3, },
1934887Schin { COM3, { '/' }, S_COMMENT, },
1944887Schin
1954887Schin /* saw / in / * comment, possible malformed nest */
1964887Schin { COM4, { C_XXX }, COM2, },
1974887Schin { COM4, { '*', '\n', C_EOF }, S_COMMENT, },
1984887Schin { COM4, { '/' }, COM4, },
1994887Schin
2004887Schin /* saw / /, start of comment */
2014887Schin { COM5, { C_XXX }, COM5, },
2024887Schin { COM5, { '\n', C_EOF }, S_COMMENT, },
2034887Schin { COM5, { '/' }, COM6, },
2044887Schin { COM5, { '*' }, COM7, },
2054887Schin
2064887Schin /* saw / in / / comment, possible malformed nest */
2074887Schin { COM6, { C_XXX }, COM5, },
2084887Schin { COM6, { '*', '\n', C_EOF }, S_COMMENT, },
2094887Schin { COM6, { '/' }, COM6, },
2104887Schin
2114887Schin /* saw * in / /, possible malformed nest */
2124887Schin { COM7, { C_XXX }, COM5, },
2134887Schin { COM7, { '\n', C_EOF }, S_COMMENT, },
2144887Schin { COM7, { '*' }, COM7, },
2154887Schin { COM7, { '/' }, S_COMMENT, },
2164887Schin
2174887Schin /* normal identifier -- always a macro candidate */
2184887Schin { NID, { C_XXX }, S_MACRO, },
2194887Schin { NID, { C_LET, C_DEC }, NID, },
2204887Schin
2214887Schin /* saw ., operator or dbl constant */
2224887Schin { DOT, { C_XXX }, S_CHRB, },
2234887Schin { DOT, { '.' }, DOT2, },
2244887Schin { DOT, { C_DEC }, BAD1, },
2254887Schin
2264887Schin /* saw .., possible ... */
2274887Schin { DOT2, { C_XXX }, BACK(T_INVALID), },
2284887Schin { DOT2, { '.' }, KEEP(T_VARIADIC), },
2294887Schin
2304887Schin /* saw L (possible start of normal wide literal) */
2314887Schin { LIT, { C_XXX }, S_MACRO, },
2324887Schin { LIT, { C_LET, C_DEC }, NID, },
2334887Schin { LIT, { '"', '\'' }, QUAL(LIT1), },
2344887Schin
2354887Schin /* saw " or ' beginning literal */
2364887Schin { LIT1, { C_XXX }, LIT1, },
2374887Schin { LIT1, { '"', '\'' }, S_LITEND, },
2384887Schin { LIT1, { '\n', C_EOF }, S_LITEND, },
2394887Schin { LIT1, { '\\' }, LIT2, },
2404887Schin
2414887Schin /* saw \ in literal */
2424887Schin { LIT2, { C_XXX }, S_LITESC, },
2434887Schin { LIT2, { '\n', C_EOF }, S_LITEND, },
2444887Schin
2454887Schin /* eat malformed numeric constant */
2464887Schin { BAD1, { C_XXX }, BACK(T_INVALID), },
2474887Schin { BAD1, { C_LET, C_DEC, '.' }, BAD1, },
2484887Schin { BAD1, { 'e', 'E' }, BAD2, },
2494887Schin
2504887Schin /* eat malformed numeric fraction|exponent */
2514887Schin { BAD2, { C_XXX }, BACK(T_INVALID), },
2524887Schin { BAD2, { C_LET, C_DEC, '.' }, BAD1, },
2534887Schin { BAD2, { '+', '-' }, BAD1, },
2544887Schin
2554887Schin /* saw white space, eat it up */
2564887Schin { WS1, { C_XXX }, S_WS, },
2574887Schin { WS1, { ' ', '\t' }, WS1, },
2584887Schin { WS1, { '\f', '\v' }, S_VS, },
2594887Schin
2604887Schin #if !PROTOMAIN
2614887Schin
2624887Schin /* quick template */
2634887Schin { QUICK, { C_XXX }, QTOK, },
2644887Schin { QUICK, { C_EOF, MARK }, S_CHRB, },
2654887Schin { QUICK, { C_LET, C_DEC }, QID, },
2664887Schin { QUICK, { 'L' }, LIT0, },
2674887Schin { QUICK, { '"', '\'' }, S_LITBEG, },
2684887Schin { QUICK, { '/' }, S_CHRB, },
2694887Schin { QUICK, { '*' }, QCOM, },
2704887Schin { QUICK, { '#' }, SHARP1, },
2714887Schin { QUICK, { '\n' }, S_NL, },
2724887Schin { QUICK, { '\f', '\v' }, S_VS, },
2734887Schin
2744887Schin /* copy QUICK to QUICK+1 through MAC0+1 */
2754887Schin { OP, {QUICK,QUICK+1,MAC0+1}, COPY, },
2764887Schin
2774887Schin /* quick start state */
2784887Schin { QUICK, { C_EOF }, S_EOF, },
2794887Schin { QUICK, { C_DEC }, QNUM, },
2804887Schin { QUICK, { MARK }, QTOK, },
2814887Schin { QUICK, { '/' }, COM1, },
2824887Schin { QUICK, { ' ', '\t' }, QUICK, },
2834887Schin
2844887Schin /* grab non-macro tokens */
2854887Schin { QTOK, { C_DEC }, QNUM, },
2864887Schin
2874887Schin /* grab numeric and invalid tokens */
2884887Schin { QNUM, { C_LET, C_DEC, '.' }, QNUM, },
2894887Schin { QNUM, { 'e', 'E' }, QEXP, },
2904887Schin
2914887Schin /* grab exponent token */
2924887Schin { QEXP, { C_LET, C_DEC, '.' }, QNUM, },
2934887Schin { QEXP, { '+', '-' }, QNUM, },
2944887Schin
2954887Schin /* saw *, grab possible bad comment terminator */
2964887Schin { QCOM, { C_DEC }, QNUM, },
2974887Schin { QCOM, { '/' }, S_COMMENT, },
2984887Schin
2994887Schin /* saw L (possible start of wide string or first macro char) */
3004887Schin { MAC0, { 'L' }, QID, },
3014887Schin { MAC0, { '"', '\'' }, QUAL(LIT1), },
3024887Schin
3034887Schin /* macro candidate template */
3044887Schin { MAC0+1, { 'L' }, QID, },
3054887Schin
3064887Schin /* copy MAC0+1 to MAC0+2 through MACN */
3074887Schin { OP, {MAC0+1,MAC0+2,MACN}, COPY },
3084887Schin
3094887Schin /* saw L (possible start of wide string or macro L) */
3104887Schin { HIT0, { C_XXX }, S_MACRO, },
3114887Schin { HIT0, { C_LET, C_DEC }, QID, },
3124887Schin { HIT0, { '"', '\'' }, QUAL(LIT1), },
3134887Schin
3144887Schin /* macro hit template */
3154887Schin { HIT0+1, { C_XXX }, S_MACRO, },
3164887Schin { HIT0+1, { C_LET, C_DEC }, QID, },
3174887Schin
3184887Schin /* copy HIT0+1 to HIT0+2 through HITN */
3194887Schin { OP, {HIT0+1,HIT0+2,HITN}, COPY },
3204887Schin
3214887Schin /* saw L (possible start of wide literal) */
3224887Schin { LIT0, { C_XXX }, S_MACRO, },
3234887Schin { LIT0, { C_LET, C_DEC }, QID, },
3244887Schin { LIT0, { '"', '\'' }, QUAL(LIT1), },
3254887Schin
3264887Schin /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
3274887Schin { COM1, { '=' }, KEEP(T_DIVEQ), },
3284887Schin
3294887Schin /* normal start state */
3304887Schin { TOKEN, { C_XXX }, S_HUH, },
3314887Schin { TOKEN, { C_EOF }, S_EOF, },
3324887Schin { TOKEN, { C_DEC }, DEC1, },
3334887Schin { TOKEN, { '0' }, OCT1, },
3344887Schin { TOKEN, { '.' }, DOT1, },
3354887Schin { TOKEN, { C_LET }, NID, },
3364887Schin { TOKEN, { 'L' }, LIT, },
3374887Schin { TOKEN, { '"', '\'', '<' }, S_LITBEG, },
3384887Schin { TOKEN, { '/' }, COM1, },
3394887Schin { TOKEN, { '\n' }, S_NL, },
3404887Schin { TOKEN, { ' ', '\t' }, WS1, },
3414887Schin { TOKEN, { '\f', '\v' }, S_VS, },
3424887Schin { TOKEN, { '#' }, SHARP1, },
3434887Schin { TOKEN, { ':' }, COLON1, },
3444887Schin { TOKEN, { '%' }, PCT1, },
3454887Schin { TOKEN, { '&' }, AND1, },
3464887Schin { TOKEN, { '*' }, STAR1, },
3474887Schin { TOKEN, { '+' }, PLUS1, },
3484887Schin { TOKEN, { '-' }, MINUS1, },
3494887Schin { TOKEN, { '=' }, EQ1, },
3504887Schin { TOKEN, { '!' }, NOT1, },
3514887Schin { TOKEN, { '>' }, GT1, },
3524887Schin { TOKEN, { '^' }, CIRC1, },
3534887Schin { TOKEN, { '|' }, OR1, },
3544887Schin { TOKEN, { '(', ')', '[', ']' }, S_CHR, },
3554887Schin { TOKEN, { '{', '}', ',', ';' }, S_CHR, },
3564887Schin { TOKEN, { '~', '?' }, S_CHR, },
3574887Schin
3584887Schin /* saw 0, possible oct|hex|dec|dbl constant */
3594887Schin { OCT1, { C_XXX }, BACK(T_DECIMAL), },
3604887Schin { OCT1, { C_LET, C_DEC }, BAD1, },
3614887Schin { OCT1, { C_OCT }, OCT2, },
3624887Schin { OCT1, { 'e', 'E' }, DBL2, },
3634887Schin { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
3644887Schin { OCT1, { 'x', 'X' }, HEX1, },
3654887Schin { OCT1, { '.' }, DBL1, },
3664887Schin
3674887Schin /* saw 0<oct>, oct constant */
3684887Schin { OCT2, { C_XXX }, BACK(T_OCTAL), },
3694887Schin { OCT2, { C_LET, C_DEC }, BAD1, },
3704887Schin { OCT2, { C_OCT }, OCT2, },
3714887Schin { OCT2, { 'e', 'E' }, DBL2, },
3724887Schin { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
3734887Schin { OCT2, { '.' }, DBL1, },
3744887Schin
3754887Schin /* oct constant qualifier */
3764887Schin { OCT3, { C_XXX }, BACK(T_OCTAL), },
3774887Schin { OCT3, { C_LET, C_DEC, '.' }, BAD1, },
3784887Schin { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
3794887Schin
3804887Schin /* saw 0 [xX], hex constant */
3814887Schin { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), },
3824887Schin { HEX1, { C_LET }, BAD1, },
3834887Schin { HEX1, { C_HEX }, HEX1, },
3844887Schin { HEX1, { 'e', 'E' }, HEX3, },
3854887Schin { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
3864887Schin { HEX1, { '.' }, HEX4, },
3874887Schin { HEX1, { 'p', 'P' }, HEX5, },
3884887Schin
3894887Schin /* hex constant qualifier */
3904887Schin { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), },
3914887Schin { HEX2, { C_LET, C_DEC, '.' }, BAD1, },
3924887Schin { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
3934887Schin
3944887Schin /* hex [eE][-+] botch */
3954887Schin { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), },
3964887Schin { HEX3, { C_LET, '.', '-', '+'},BAD1, },
3974887Schin { HEX3, { C_HEX }, HEX1, },
3984887Schin { HEX3, { 'e', 'E' }, HEX3, },
3994887Schin { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
4004887Schin
4014887Schin /* hex dbl fraction */
4024887Schin { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), },
4034887Schin { HEX4, { C_LET, '.' }, BAD1, },
4044887Schin { HEX4, { C_HEX }, HEX4, },
4054887Schin { HEX4, { 'p', 'P' }, HEX5, },
4064887Schin { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
4074887Schin
4084887Schin /* optional hex dbl exponent sign */
4094887Schin { HEX5, { C_XXX }, BACK(T_INVALID), },
4104887Schin { HEX5, { C_LET, '.' }, BAD1, },
4114887Schin { HEX5, { '+', '-' }, HEX6, },
4124887Schin { HEX5, { C_DEC }, HEX7, },
4134887Schin
4144887Schin /* mandatory hex dbl exponent first digit */
4154887Schin { HEX6, { C_XXX }, BACK(T_INVALID), },
4164887Schin { HEX6, { C_LET, '.' }, BAD1, },
4174887Schin { HEX6, { C_DEC }, HEX7, },
4184887Schin
4194887Schin /* hex dbl exponent digits */
4204887Schin { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), },
4214887Schin { HEX7, { C_LET, '.' }, BAD1, },
4224887Schin { HEX7, { C_DEC }, HEX7, },
4234887Schin { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
4244887Schin
4254887Schin /* hex dbl constant qualifier */
4264887Schin { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), },
4274887Schin { HEX8, { C_LET, '.' }, BAD1, },
4284887Schin { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
4294887Schin
4304887Schin /* saw <dec>, dec constant */
4314887Schin { DEC1, { C_XXX }, BACK(T_DECIMAL), },
4324887Schin { DEC1, { C_LET }, BAD1, },
4334887Schin { DEC1, { C_DEC }, DEC1, },
4344887Schin { DEC1, { 'e', 'E' }, DBL2, },
4354887Schin { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
4364887Schin { DEC1, { '.' }, DBL1, },
4374887Schin
4384887Schin /* dec constant qualifier */
4394887Schin { DEC2, { C_XXX }, BACK(T_DECIMAL), },
4404887Schin { DEC2, { C_LET, C_DEC }, BAD1, },
4414887Schin { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
4424887Schin
4434887Schin /* saw ., operator or dbl constant */
4444887Schin { DOT1, { C_XXX }, S_CHRB, },
4454887Schin { DOT1, { '.' }, DOT2, },
4464887Schin { DOT1, { C_DEC }, DBL1, },
4474887Schin
4484887Schin /* dbl fraction */
4494887Schin { DBL1, { C_XXX }, BACK(T_DOUBLE), },
4504887Schin { DBL1, { C_LET, '.' }, BAD1, },
4514887Schin { DBL1, { C_DEC }, DBL1, },
4524887Schin { DBL1, { 'e', 'E' }, DBL2, },
4534887Schin { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
4544887Schin
4554887Schin /* optional dbl exponent sign */
4564887Schin { DBL2, { C_XXX }, BACK(T_INVALID), },
4574887Schin { DBL2, { C_LET, '.' }, BAD1, },
4584887Schin { DBL2, { '+', '-' }, DBL3, },
4594887Schin { DBL2, { C_DEC }, DBL4, },
4604887Schin
4614887Schin /* mandatory dbl exponent first digit */
4624887Schin { DBL3, { C_XXX }, BACK(T_INVALID), },
4634887Schin { DBL3, { C_LET, '.' }, BAD1, },
4644887Schin { DBL3, { C_DEC }, DBL4, },
4654887Schin
4664887Schin /* dbl exponent digits */
4674887Schin { DBL4, { C_XXX }, BACK(T_DOUBLE), },
4684887Schin { DBL4, { C_LET, '.' }, BAD1, },
4694887Schin { DBL4, { C_DEC }, DBL4, },
4704887Schin { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
4714887Schin
4724887Schin /* dbl constant qualifier */
4734887Schin { DBL5, { C_XXX }, BACK(T_DOUBLE), },
4744887Schin { DBL5, { C_LET, '.' }, BAD1, },
4754887Schin { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
4764887Schin
4774887Schin /* saw < starting include header */
4784887Schin { HDR1, { C_XXX }, HDR1, },
4794887Schin { HDR1, { '>', '\n', C_EOF }, S_LITEND, },
4804887Schin
4814887Schin /* saw <binop><space> expecting = */
4824887Schin { BIN1, { C_XXX }, S_HUH, },
4834887Schin { BIN1, { ' ', '\t' }, BIN1, },
4844887Schin
4854887Schin /* 2-char ops */
4864887Schin
4874887Schin { SHARP1, { C_XXX }, S_SHARP, },
4884887Schin
4894887Schin { PCT1, { C_XXX }, S_CHRB, },
4904887Schin { PCT1, { '=' }, KEEP(T_MODEQ), },
4914887Schin
4924887Schin { AND1, { C_XXX }, S_CHRB, },
4934887Schin { AND1, { '=' }, KEEP(T_ANDEQ), },
4944887Schin { AND1, { '&' }, KEEP(T_ANDAND), },
4954887Schin
4964887Schin { STAR1, { C_XXX }, S_CHRB, },
4974887Schin { STAR1, { '=' }, KEEP(T_MPYEQ), },
4984887Schin { STAR1, { '/' }, S_COMMENT, },
4994887Schin
5004887Schin { PLUS1, { C_XXX }, S_CHRB, },
5014887Schin { PLUS1, { '=' }, KEEP(T_ADDEQ), },
5024887Schin { PLUS1, { '+' }, KEEP(T_ADDADD), },
5034887Schin
5044887Schin { MINUS1, { C_XXX }, S_CHRB, },
5054887Schin { MINUS1, { '=' }, KEEP(T_SUBEQ), },
5064887Schin { MINUS1, { '-' }, KEEP(T_SUBSUB), },
5074887Schin { MINUS1, { '>' }, KEEP(T_PTRMEM), },
5084887Schin
5094887Schin { COLON1, { C_XXX }, S_CHRB, },
5104887Schin { COLON1, { '=', '>' }, S_HUH, },
5114887Schin
5124887Schin { LT1, { C_XXX }, S_CHRB, },
5134887Schin { LT1, { '=' }, KEEP(T_LE), },
5144887Schin { LT1, { '<' }, LSH1, },
5154887Schin
5164887Schin { EQ1, { C_XXX }, S_CHRB, },
5174887Schin { EQ1, { '=' }, KEEP(T_EQ), },
5184887Schin
5194887Schin { NOT1, { C_XXX }, S_CHRB, },
5204887Schin { NOT1, { '=' }, KEEP(T_NE), },
5214887Schin
5224887Schin { GT1, { C_XXX }, S_CHRB, },
5234887Schin { GT1, { '=' }, KEEP(T_GE), },
5244887Schin { GT1, { '>' }, RSH1, },
5254887Schin
5264887Schin { CIRC1, { C_XXX }, S_CHRB, },
5274887Schin { CIRC1, { '=' }, KEEP(T_XOREQ), },
5284887Schin
5294887Schin { OR1, { C_XXX }, S_CHRB, },
5304887Schin { OR1, { '=' }, KEEP(T_OREQ), },
5314887Schin { OR1, { '|' }, KEEP(T_OROR), },
5324887Schin
5334887Schin /* 3-char ops */
5344887Schin
5354887Schin { ARROW1, { C_XXX }, BACK(T_PTRMEM), },
5364887Schin { ARROW1, { '*' }, KEEP(T_PTRMEMREF), },
5374887Schin
5384887Schin { LSH1, { C_XXX }, BACK(T_LSHIFT), },
5394887Schin { LSH1, { '=' }, KEEP(T_LSHIFTEQ), },
5404887Schin
5414887Schin { RSH1, { C_XXX }, BACK(T_RSHIFT), },
5424887Schin { RSH1, { '=' }, KEEP(T_RSHIFTEQ), },
5434887Schin
5444887Schin #endif
5454887Schin
5464887Schin /* end */
5474887Schin { OP, { 0 }, END, }
5484887Schin };
5494887Schin
5504887Schin short fsm[TERMINAL+1][MAX+1];
5514887Schin
5524887Schin char trigraph[MAX+1];
5534887Schin
5544887Schin #if PROTOMAIN
5554887Schin static char spl[] = { '\\', '\r', 0 };
5564887Schin static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
5574887Schin #else
5584887Schin static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
5594887Schin static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
5604887Schin #endif
5614887Schin static char* let = &aln[10];
5624887Schin static char hex[] = "fedcbaFEDCBA9876543210";
5634887Schin static char* dec = &hex[12];
5644887Schin static char* oct = &hex[14];
5654887Schin
5664887Schin /*
5674887Schin * runtime FSM modifications
5684887Schin * ppfsm(FSM_INIT,0) must be called first
5694887Schin */
5704887Schin
5714887Schin void
ppfsm(int op,register char * s)5724887Schin ppfsm(int op, register char* s)
5734887Schin {
5744887Schin register int c;
5754887Schin register int n;
5764887Schin register int i;
5774887Schin register short* rp;
5784887Schin register struct fsminit* fp;
5794887Schin #if !PROTOMAIN
5804887Schin char* t;
5814887Schin int x;
5824887Schin #endif
5834887Schin
5844887Schin switch (op)
5854887Schin {
5864887Schin
5874887Schin #if !PROTOMAIN
5884887Schin
5894887Schin case FSM_IDADD:
5904887Schin while (c = *s++)
5914887Schin if (!ppisid(c))
5924887Schin {
5934887Schin if (fsm[TOKEN][c] == ~S_HUH)
5944887Schin {
5954887Schin setid(c);
5964887Schin for (i = 0; i < TERMINAL; i++)
5974887Schin fsm[i][c] = IDSTATE(fsm[i]['_']);
5984887Schin }
5994887Schin else error(2, "%c: cannot add to identifier set", c);
6004887Schin }
6014887Schin break;
6024887Schin
6034887Schin case FSM_IDDEL:
6044887Schin while (c = *s++)
6054887Schin if (ppisid(c))
6064887Schin {
6074887Schin clrid(c);
6084887Schin for (i = 0; i < TERMINAL; i++)
6094887Schin fsm[i][c] = ~S_HUH;
6104887Schin }
6114887Schin break;
6124887Schin
6134887Schin #endif
6144887Schin
6154887Schin case FSM_INIT:
6164887Schin for (fp = fsminit;; fp++)
6174887Schin {
6184887Schin if ((n = fp->nextstate) >= TERMINAL) n = ~n;
6194887Schin if (fp->state == OP)
6204887Schin {
6214887Schin #if !PROTOMAIN
6224887Schin switch (n)
6234887Schin {
6244887Schin case COPY:
6254887Schin c = fp->ch[0];
6264887Schin n = fp->ch[2];
6274887Schin for (i = fp->ch[1]; i <= n; i++)
6284887Schin copy(i, c);
6294887Schin continue;
6304887Schin default:
6314887Schin break;
6324887Schin }
6334887Schin #endif
6344887Schin break;
6354887Schin }
6364887Schin rp = fsm[fp->state];
6374887Schin for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
6384887Schin {
6394887Schin switch (c)
6404887Schin {
6414887Schin case C_XXX:
6424887Schin for (c = 0; c <= MAX; c++)
6434887Schin rp[c] = n;
6444887Schin /*FALLTHROUGH*/
6454887Schin
6464887Schin case C_EOF:
6474887Schin fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
6484887Schin continue;
6494887Schin
6504887Schin case C_LET:
6514887Schin s = let;
6524887Schin break;
6534887Schin
6544887Schin case C_HEX:
6554887Schin s = hex;
6564887Schin break;
6574887Schin
6584887Schin case C_DEC:
6594887Schin s = dec;
6604887Schin break;
6614887Schin
6624887Schin case C_OCT:
6634887Schin s = oct;
6644887Schin break;
6654887Schin
6664887Schin default:
6674887Schin rp[c] = n;
6684887Schin continue;
6694887Schin }
6704887Schin while (c = *s++)
6714887Schin rp[c] = n;
6724887Schin }
6734887Schin }
6744887Schin
6754887Schin /*
6764887Schin * install splice special cases
6774887Schin * and same non-terminal transitions
6784887Schin */
6794887Schin
6804887Schin for (i = 0; i < TERMINAL; i++)
6814887Schin {
6824887Schin rp = fsm[i];
6834887Schin s = spl;
6844887Schin while (c = *s++)
6854887Schin if (c != MARK || !INCOMMENT(rp))
6864887Schin {
6874887Schin if (rp[c] >= 0) rp[c] = ~rp[c];
6884887Schin rp[c] &= ~SPLICE;
6894887Schin }
6904887Schin rp[EOB] = ~S_EOB;
6914887Schin for (c = 0; c <= MAX; c++)
6924887Schin if (rp[c] == i)
6934887Schin rp[c] = 0;
6944887Schin }
6954887Schin fsm[TERMINAL][0] = ~S_EOB;
6964887Schin
6974887Schin #if !PROTOMAIN
6984887Schin
6994887Schin /*
7004887Schin * default character types
7014887Schin */
7024887Schin
7034887Schin s = let;
7044887Schin while (c = *s++)
7054887Schin setid(c);
7064887Schin s = dec;
7074887Schin while (c = *s++)
7084887Schin setdig(c);
7094887Schin s = spl;
7104887Schin do setsplice(c = *s++); while (c);
7114887Schin
7124887Schin /*
7134887Schin * trigraph map
7144887Schin */
7154887Schin
7164887Schin trigraph['='] = '#';
7174887Schin trigraph['('] = '[';
7184887Schin trigraph['/'] = '\\';
7194887Schin trigraph[')'] = ']';
7204887Schin trigraph['\''] = '^';
7214887Schin trigraph['<'] = '{';
7224887Schin trigraph['!'] = '|';
7234887Schin trigraph['>'] = '}';
7244887Schin trigraph['-'] = '~';
7254887Schin #endif
7264887Schin break;
7274887Schin
7284887Schin #if !PROTOMAIN
7294887Schin
7304887Schin case FSM_PLUSPLUS:
7314887Schin if (pp.option & PLUSPLUS)
7324887Schin {
7334887Schin fsm[COLON1][':'] = ~KEEP(T_SCOPE);
7344887Schin fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
7354887Schin fsm[MINUS1]['>'] = ARROW1;
7364887Schin fsm[COM1]['/'] = COM5;
7374887Schin t = "%<:";
7384887Schin for (i = 0; i < TERMINAL; i++)
7394887Schin {
7404887Schin rp = fsm[i];
7414887Schin if (!INCOMMENT(rp) && !INQUOTE(rp))
7424887Schin {
7434887Schin s = t;
7444887Schin while (c = *s++)
7454887Schin {
7464887Schin if (rp[c] > 0) rp[c] = ~rp[c];
7474887Schin else if (!rp[c]) rp[c] = ~i;
7484887Schin rp[c] &= ~SPLICE;
7494887Schin }
7504887Schin }
7514887Schin }
7524887Schin s = t;
7534887Schin while (c = *s++) setsplice(c);
7544887Schin }
7554887Schin else
7564887Schin {
7574887Schin fsm[COLON1][':'] = ~S_CHRB;
7584887Schin fsm[DOT1]['*'] = ~S_CHRB;
7594887Schin fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
7604887Schin fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
7614887Schin }
7624887Schin break;
7634887Schin
7644887Schin #if COMPATIBLE
7654887Schin
7664887Schin case FSM_COMPATIBILITY:
7674887Schin if (pp.state & COMPATIBILITY)
7684887Schin {
7694887Schin fsm[HEX1]['e'] = HEX1;
7704887Schin fsm[HEX1]['E'] = HEX1;
7714887Schin fsm[QNUM]['e'] = QNUM;
7724887Schin fsm[QNUM]['E'] = QNUM;
7734887Schin fsm[QNUM]['u'] = ~QUAL(QNUM);
7744887Schin fsm[QNUM]['U'] = ~QUAL(QNUM);
7754887Schin }
7764887Schin else
7774887Schin {
7784887Schin fsm[HEX1]['e'] = HEX3;
7794887Schin fsm[HEX1]['E'] = HEX3;
7804887Schin fsm[QNUM]['e'] = QEXP;
7814887Schin fsm[QNUM]['E'] = QEXP;
7824887Schin fsm[QNUM]['u'] = QNUM;
7834887Schin fsm[QNUM]['U'] = QNUM;
7844887Schin }
7854887Schin break;
7864887Schin
7874887Schin #endif
7884887Schin
7894887Schin case FSM_QUOTADD:
7904887Schin while (c = *s++)
7914887Schin if (fsm[TOKEN][c] == ~S_HUH)
7924887Schin for (i = 0; i < TERMINAL; i++)
7934887Schin fsm[i][c] = fsm[i]['"'];
7944887Schin else error(2, "%c: cannot add to quote set", c);
7954887Schin break;
7964887Schin
7974887Schin case FSM_QUOTDEL:
7984887Schin while (c = *s++)
7994887Schin if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
8004887Schin for (i = 0; i < TERMINAL; i++)
8014887Schin fsm[i][c] = fsm[i]['_'];
8024887Schin break;
8034887Schin
8044887Schin case FSM_OPSPACE:
8054887Schin n = s ? BIN1 : ~S_CHRB;
8064887Schin fsm[COM1][' '] = fsm[COM1]['\t'] = n;
8074887Schin fsm[AND1][' '] = fsm[AND1]['\t'] = n;
8084887Schin fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
8094887Schin fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
8104887Schin fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
8114887Schin fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
8124887Schin fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
8134887Schin fsm[OR1][' '] = fsm[OR1]['\t'] = n;
8144887Schin fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
8154887Schin fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
8164887Schin break;
8174887Schin
8184887Schin case FSM_MACRO:
8194887Schin if (pp.truncate && strlen(s) >= pp.truncate)
8204887Schin {
8214887Schin x = s[pp.truncate];
8224887Schin s[pp.truncate] = 0;
8234887Schin }
8244887Schin else x = -1;
8254887Schin i = MAC0 + ((c = *s++) != 'L');
8264887Schin if ((n = fsm[QUICK][c]) != (i + NMAC))
8274887Schin {
8284887Schin n = i;
8294887Schin if (!*s) n += NMAC;
8304887Schin }
8314887Schin if (fsm[QUICK][c] != n)
8324887Schin fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
8334887Schin if (c = *s++)
8344887Schin {
8354887Schin for (;;)
8364887Schin {
8374887Schin if ((i = n) < HIT0)
8384887Schin {
8394887Schin if (n < MACN) n++;
8404887Schin if (!*s)
8414887Schin {
8424887Schin n += NMAC;
8434887Schin break;
8444887Schin }
8454887Schin if (fsm[i][c] < HIT0)
8464887Schin fsm[i][c] = n;
8474887Schin if (fsm[i + NMAC][c] < HIT0)
8484887Schin fsm[i + NMAC][c] = n;
8494887Schin }
8504887Schin else
8514887Schin {
8524887Schin if (n < HITN) n++;
8534887Schin if (!*s) break;
8544887Schin if (fsm[i][c] < HIT0)
8554887Schin {
8564887Schin n -= NMAC;
8574887Schin fsm[i][c] = n;
8584887Schin }
8594887Schin }
8604887Schin c = *s++;
8614887Schin }
8624887Schin if (x >= 0)
8634887Schin {
8644887Schin *s = x;
8654887Schin for (n = CHAR_MIN; n <= CHAR_MAX; n++)
8664887Schin if (ppisidig(n))
8674887Schin fsm[HITN][n] = HITN;
8684887Schin n = HITN;
8694887Schin }
8704887Schin if (fsm[i][c] < n)
8714887Schin fsm[i][c] = n;
8724887Schin if (i < HIT0 && fsm[i + NMAC][c] < n)
8734887Schin fsm[i + NMAC][c] = n;
8744887Schin }
8754887Schin break;
8764887Schin
8774887Schin #endif
8784887Schin
8794887Schin }
8804887Schin }
8814887Schin
8824887Schin #if !PROTOMAIN
8834887Schin
8844887Schin /*
8854887Schin * file buffer refill
8864887Schin * c is current input char
8874887Schin */
8884887Schin
8894887Schin void
refill(register int c)8904887Schin refill(register int c)
8914887Schin {
8924887Schin if (pp.in->flags & IN_eof)
8934887Schin {
8944887Schin pp.in->nextchr--;
8954887Schin c = 0;
8964887Schin }
8974887Schin else
8984887Schin {
8994887Schin *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
9004887Schin c =
9014887Schin #if PROTOTYPE
9024887Schin (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
9034887Schin #endif
9044887Schin read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
9054887Schin }
9064887Schin if (c > 0)
9074887Schin {
9084887Schin if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
9094887Schin else pp.in->flags &= ~IN_newline;
9104887Schin #if PROTOTYPE
9114887Schin if (!(pp.in->flags & IN_prototype))
9124887Schin #endif
9134887Schin if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
9144887Schin {
9154887Schin pp.in->flags |= IN_eof;
9164887Schin close(pp.in->fd);
9174887Schin pp.in->fd = -1;
9184887Schin }
9194887Schin }
9204887Schin else
9214887Schin {
9224887Schin if (c < 0)
9234887Schin {
9244887Schin error(ERROR_SYSTEM|3, "read error");
9254887Schin c = 0;
9264887Schin }
9274887Schin else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
9284887Schin {
9294887Schin static char ket[] = { 0, '}', '\n', 0 };
9304887Schin
9314887Schin pp.in->flags ^= IN_c;
9324887Schin pp.in->nextchr = ket + 1;
9334887Schin c = 2;
9344887Schin }
9354887Schin pp.in->flags |= IN_eof;
9364887Schin }
9374887Schin #if CHECKPOINT
9384887Schin pp.in->buflen = c;
9394887Schin #endif
9404887Schin pp.in->nextchr[c] = 0;
9414887Schin debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
9424887Schin if (pp.test & 0x0080)
9434887Schin sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
9444887Schin }
9454887Schin
9464887Schin #endif
947