xref: /minix3/usr.bin/indent/lexi.c (revision 97c7d358ea5970ca59196ed90e975115245f9e76)
1*97c7d358SVivek Prakash /*	$NetBSD: lexi.c,v 1.13 2009/04/12 11:09:49 lukem Exp $	*/
2*97c7d358SVivek Prakash 
3*97c7d358SVivek Prakash /*
4*97c7d358SVivek Prakash  * Copyright (c) 1980, 1993
5*97c7d358SVivek Prakash  *	The Regents of the University of California.  All rights reserved.
6*97c7d358SVivek Prakash  *
7*97c7d358SVivek Prakash  * Redistribution and use in source and binary forms, with or without
8*97c7d358SVivek Prakash  * modification, are permitted provided that the following conditions
9*97c7d358SVivek Prakash  * are met:
10*97c7d358SVivek Prakash  * 1. Redistributions of source code must retain the above copyright
11*97c7d358SVivek Prakash  *    notice, this list of conditions and the following disclaimer.
12*97c7d358SVivek Prakash  * 2. Redistributions in binary form must reproduce the above copyright
13*97c7d358SVivek Prakash  *    notice, this list of conditions and the following disclaimer in the
14*97c7d358SVivek Prakash  *    documentation and/or other materials provided with the distribution.
15*97c7d358SVivek Prakash  * 3. Neither the name of the University nor the names of its contributors
16*97c7d358SVivek Prakash  *    may be used to endorse or promote products derived from this software
17*97c7d358SVivek Prakash  *    without specific prior written permission.
18*97c7d358SVivek Prakash  *
19*97c7d358SVivek Prakash  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20*97c7d358SVivek Prakash  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21*97c7d358SVivek Prakash  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22*97c7d358SVivek Prakash  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23*97c7d358SVivek Prakash  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24*97c7d358SVivek Prakash  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25*97c7d358SVivek Prakash  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26*97c7d358SVivek Prakash  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27*97c7d358SVivek Prakash  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28*97c7d358SVivek Prakash  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29*97c7d358SVivek Prakash  * SUCH DAMAGE.
30*97c7d358SVivek Prakash  */
31*97c7d358SVivek Prakash 
32*97c7d358SVivek Prakash /*
33*97c7d358SVivek Prakash  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
34*97c7d358SVivek Prakash  * Copyright (c) 1985 Sun Microsystems, Inc.
35*97c7d358SVivek Prakash  * All rights reserved.
36*97c7d358SVivek Prakash  *
37*97c7d358SVivek Prakash  * Redistribution and use in source and binary forms, with or without
38*97c7d358SVivek Prakash  * modification, are permitted provided that the following conditions
39*97c7d358SVivek Prakash  * are met:
40*97c7d358SVivek Prakash  * 1. Redistributions of source code must retain the above copyright
41*97c7d358SVivek Prakash  *    notice, this list of conditions and the following disclaimer.
42*97c7d358SVivek Prakash  * 2. Redistributions in binary form must reproduce the above copyright
43*97c7d358SVivek Prakash  *    notice, this list of conditions and the following disclaimer in the
44*97c7d358SVivek Prakash  *    documentation and/or other materials provided with the distribution.
45*97c7d358SVivek Prakash  * 3. All advertising materials mentioning features or use of this software
46*97c7d358SVivek Prakash  *    must display the following acknowledgement:
47*97c7d358SVivek Prakash  *	This product includes software developed by the University of
48*97c7d358SVivek Prakash  *	California, Berkeley and its contributors.
49*97c7d358SVivek Prakash  * 4. Neither the name of the University nor the names of its contributors
50*97c7d358SVivek Prakash  *    may be used to endorse or promote products derived from this software
51*97c7d358SVivek Prakash  *    without specific prior written permission.
52*97c7d358SVivek Prakash  *
53*97c7d358SVivek Prakash  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54*97c7d358SVivek Prakash  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55*97c7d358SVivek Prakash  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56*97c7d358SVivek Prakash  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57*97c7d358SVivek Prakash  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58*97c7d358SVivek Prakash  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59*97c7d358SVivek Prakash  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60*97c7d358SVivek Prakash  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61*97c7d358SVivek Prakash  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62*97c7d358SVivek Prakash  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63*97c7d358SVivek Prakash  * SUCH DAMAGE.
64*97c7d358SVivek Prakash  */
65*97c7d358SVivek Prakash 
66*97c7d358SVivek Prakash #include <sys/cdefs.h>
67*97c7d358SVivek Prakash #ifndef lint
68*97c7d358SVivek Prakash #if 0
69*97c7d358SVivek Prakash static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
70*97c7d358SVivek Prakash #else
71*97c7d358SVivek Prakash __RCSID("$NetBSD: lexi.c,v 1.13 2009/04/12 11:09:49 lukem Exp $");
72*97c7d358SVivek Prakash #endif
73*97c7d358SVivek Prakash #endif				/* not lint */
74*97c7d358SVivek Prakash 
75*97c7d358SVivek Prakash /*
76*97c7d358SVivek Prakash  * Here we have the token scanner for indent.  It scans off one token and puts
77*97c7d358SVivek Prakash  * it in the global variable "token".  It returns a code, indicating the type
78*97c7d358SVivek Prakash  * of token scanned.
79*97c7d358SVivek Prakash  */
80*97c7d358SVivek Prakash 
81*97c7d358SVivek Prakash #include <stdio.h>
82*97c7d358SVivek Prakash #include <ctype.h>
83*97c7d358SVivek Prakash #include <stdlib.h>
84*97c7d358SVivek Prakash #include <string.h>
85*97c7d358SVivek Prakash #include "indent_globs.h"
86*97c7d358SVivek Prakash #include "indent_codes.h"
87*97c7d358SVivek Prakash 
88*97c7d358SVivek Prakash #define alphanum 1
89*97c7d358SVivek Prakash #define opchar 3
90*97c7d358SVivek Prakash 
91*97c7d358SVivek Prakash struct templ {
92*97c7d358SVivek Prakash 	const char	*rwd;
93*97c7d358SVivek Prakash 	int		rwcode;
94*97c7d358SVivek Prakash };
95*97c7d358SVivek Prakash 
96*97c7d358SVivek Prakash struct templ specials[1000] =
97*97c7d358SVivek Prakash {
98*97c7d358SVivek Prakash 	{"switch", 1},
99*97c7d358SVivek Prakash 	{"case", 2},
100*97c7d358SVivek Prakash 	{"break", 0},
101*97c7d358SVivek Prakash 	{"struct", 3},
102*97c7d358SVivek Prakash 	{"union", 3},
103*97c7d358SVivek Prakash 	{"enum", 3},
104*97c7d358SVivek Prakash 	{"default", 2},
105*97c7d358SVivek Prakash 	{"int", 4},
106*97c7d358SVivek Prakash 	{"char", 4},
107*97c7d358SVivek Prakash 	{"float", 4},
108*97c7d358SVivek Prakash 	{"double", 4},
109*97c7d358SVivek Prakash 	{"long", 4},
110*97c7d358SVivek Prakash 	{"short", 4},
111*97c7d358SVivek Prakash 	{"typdef", 4},
112*97c7d358SVivek Prakash 	{"unsigned", 4},
113*97c7d358SVivek Prakash 	{"register", 4},
114*97c7d358SVivek Prakash 	{"static", 4},
115*97c7d358SVivek Prakash 	{"global", 4},
116*97c7d358SVivek Prakash 	{"extern", 4},
117*97c7d358SVivek Prakash 	{"void", 4},
118*97c7d358SVivek Prakash 	{"goto", 0},
119*97c7d358SVivek Prakash 	{"return", 0},
120*97c7d358SVivek Prakash 	{"if", 5},
121*97c7d358SVivek Prakash 	{"while", 5},
122*97c7d358SVivek Prakash 	{"for", 5},
123*97c7d358SVivek Prakash 	{"else", 6},
124*97c7d358SVivek Prakash 	{"do", 6},
125*97c7d358SVivek Prakash 	{"sizeof", 7},
126*97c7d358SVivek Prakash 	{0, 0}
127*97c7d358SVivek Prakash };
128*97c7d358SVivek Prakash 
129*97c7d358SVivek Prakash char    chartype[128] =
130*97c7d358SVivek Prakash {				/* this is used to facilitate the decision of
131*97c7d358SVivek Prakash 				 * what type (alphanumeric, operator) each
132*97c7d358SVivek Prakash 				 * character is */
133*97c7d358SVivek Prakash 	0, 0, 0, 0, 0, 0, 0, 0,
134*97c7d358SVivek Prakash 	0, 0, 0, 0, 0, 0, 0, 0,
135*97c7d358SVivek Prakash 	0, 0, 0, 0, 0, 0, 0, 0,
136*97c7d358SVivek Prakash 	0, 0, 0, 0, 0, 0, 0, 0,
137*97c7d358SVivek Prakash 	0, 3, 0, 0, 1, 3, 3, 0,
138*97c7d358SVivek Prakash 	0, 0, 3, 3, 0, 3, 0, 3,
139*97c7d358SVivek Prakash 	1, 1, 1, 1, 1, 1, 1, 1,
140*97c7d358SVivek Prakash 	1, 1, 0, 0, 3, 3, 3, 3,
141*97c7d358SVivek Prakash 	0, 1, 1, 1, 1, 1, 1, 1,
142*97c7d358SVivek Prakash 	1, 1, 1, 1, 1, 1, 1, 1,
143*97c7d358SVivek Prakash 	1, 1, 1, 1, 1, 1, 1, 1,
144*97c7d358SVivek Prakash 	1, 1, 1, 0, 0, 0, 3, 1,
145*97c7d358SVivek Prakash 	0, 1, 1, 1, 1, 1, 1, 1,
146*97c7d358SVivek Prakash 	1, 1, 1, 1, 1, 1, 1, 1,
147*97c7d358SVivek Prakash 	1, 1, 1, 1, 1, 1, 1, 1,
148*97c7d358SVivek Prakash 	1, 1, 1, 0, 3, 0, 3, 0
149*97c7d358SVivek Prakash };
150*97c7d358SVivek Prakash 
151*97c7d358SVivek Prakash 
152*97c7d358SVivek Prakash 
153*97c7d358SVivek Prakash 
154*97c7d358SVivek Prakash int
lexi(void)155*97c7d358SVivek Prakash lexi(void)
156*97c7d358SVivek Prakash {
157*97c7d358SVivek Prakash 	int     unary_delim;	/* this is set to 1 if the current token
158*97c7d358SVivek Prakash 				 *
159*97c7d358SVivek Prakash 				 * forces a following operator to be unary */
160*97c7d358SVivek Prakash 	static int last_code;	/* the last token type returned */
161*97c7d358SVivek Prakash 	static int l_struct;	/* set to 1 if the last token was 'struct' */
162*97c7d358SVivek Prakash 	int     code;		/* internal code to be returned */
163*97c7d358SVivek Prakash 	char    qchar;		/* the delimiter character for a string */
164*97c7d358SVivek Prakash 
165*97c7d358SVivek Prakash 	e_token = s_token;	/* point to start of place to save token */
166*97c7d358SVivek Prakash 	unary_delim = false;
167*97c7d358SVivek Prakash 	ps.col_1 = ps.last_nl;	/* tell world that this token started in
168*97c7d358SVivek Prakash 				 * column 1 iff the last thing scanned was nl */
169*97c7d358SVivek Prakash 	ps.last_nl = false;
170*97c7d358SVivek Prakash 
171*97c7d358SVivek Prakash 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
172*97c7d358SVivek Prakash 		ps.col_1 = false;	/* leading blanks imply token is not
173*97c7d358SVivek Prakash 					 * in column 1 */
174*97c7d358SVivek Prakash 		if (++buf_ptr >= buf_end)
175*97c7d358SVivek Prakash 			fill_buffer();
176*97c7d358SVivek Prakash 	}
177*97c7d358SVivek Prakash 
178*97c7d358SVivek Prakash 	/* Scan an alphanumeric token */
179*97c7d358SVivek Prakash 	if (chartype[(int) *buf_ptr] == alphanum ||
180*97c7d358SVivek Prakash 	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
181*97c7d358SVivek Prakash 		/*
182*97c7d358SVivek Prakash 		 * we have a character or number
183*97c7d358SVivek Prakash 		 */
184*97c7d358SVivek Prakash 		const char *j;	/* used for searching thru list of
185*97c7d358SVivek Prakash 				 * reserved words */
186*97c7d358SVivek Prakash 		struct templ *p;
187*97c7d358SVivek Prakash 
188*97c7d358SVivek Prakash 		if (isdigit((unsigned char)*buf_ptr) ||
189*97c7d358SVivek Prakash 		    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
190*97c7d358SVivek Prakash 			int     seendot = 0, seenexp = 0, seensfx = 0;
191*97c7d358SVivek Prakash 			if (*buf_ptr == '0' &&
192*97c7d358SVivek Prakash 			    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
193*97c7d358SVivek Prakash 				*e_token++ = *buf_ptr++;
194*97c7d358SVivek Prakash 				*e_token++ = *buf_ptr++;
195*97c7d358SVivek Prakash 				while (isxdigit((unsigned char)*buf_ptr)) {
196*97c7d358SVivek Prakash 					CHECK_SIZE_TOKEN;
197*97c7d358SVivek Prakash 					*e_token++ = *buf_ptr++;
198*97c7d358SVivek Prakash 				}
199*97c7d358SVivek Prakash 			} else {
200*97c7d358SVivek Prakash 				while (1) {
201*97c7d358SVivek Prakash 					if (*buf_ptr == '.') {
202*97c7d358SVivek Prakash 						if (seendot)
203*97c7d358SVivek Prakash 							break;
204*97c7d358SVivek Prakash 						else
205*97c7d358SVivek Prakash 							seendot++;
206*97c7d358SVivek Prakash 					}
207*97c7d358SVivek Prakash 					CHECK_SIZE_TOKEN;
208*97c7d358SVivek Prakash 					*e_token++ = *buf_ptr++;
209*97c7d358SVivek Prakash 					if (!isdigit((unsigned char)*buf_ptr)
210*97c7d358SVivek Prakash 					&& *buf_ptr != '.') {
211*97c7d358SVivek Prakash 						if ((*buf_ptr != 'E'
212*97c7d358SVivek Prakash 						&& *buf_ptr != 'e') || seenexp)
213*97c7d358SVivek Prakash 							break;
214*97c7d358SVivek Prakash 						else {
215*97c7d358SVivek Prakash 							seenexp++;
216*97c7d358SVivek Prakash 							seendot++;
217*97c7d358SVivek Prakash 							CHECK_SIZE_TOKEN;
218*97c7d358SVivek Prakash 							*e_token++ = *buf_ptr++;
219*97c7d358SVivek Prakash 							if (*buf_ptr == '+' || *buf_ptr == '-')
220*97c7d358SVivek Prakash 								*e_token++ = *buf_ptr++;
221*97c7d358SVivek Prakash 						}
222*97c7d358SVivek Prakash 					}
223*97c7d358SVivek Prakash 				}
224*97c7d358SVivek Prakash 			}
225*97c7d358SVivek Prakash 			if (*buf_ptr == 'F' || *buf_ptr == 'f') {
226*97c7d358SVivek Prakash 				/* float constant */
227*97c7d358SVivek Prakash 				*e_token++ = *buf_ptr++;
228*97c7d358SVivek Prakash 			} else {
229*97c7d358SVivek Prakash 				/* integer constant */
230*97c7d358SVivek Prakash 				while (1) {
231*97c7d358SVivek Prakash 					if (!(seensfx & 1) &&
232*97c7d358SVivek Prakash 					    (*buf_ptr == 'U' ||
233*97c7d358SVivek Prakash 					     *buf_ptr == 'u')) {
234*97c7d358SVivek Prakash 						CHECK_SIZE_TOKEN;
235*97c7d358SVivek Prakash 						*e_token++ = *buf_ptr++;
236*97c7d358SVivek Prakash 						seensfx |= 1;
237*97c7d358SVivek Prakash 						continue;
238*97c7d358SVivek Prakash 					}
239*97c7d358SVivek Prakash 					if (!(seensfx & 2) &&
240*97c7d358SVivek Prakash 					    (*buf_ptr == 'L' ||
241*97c7d358SVivek Prakash 					     *buf_ptr == 'l')) {
242*97c7d358SVivek Prakash 						CHECK_SIZE_TOKEN;
243*97c7d358SVivek Prakash 						if (buf_ptr[1] == buf_ptr[0])
244*97c7d358SVivek Prakash 							*e_token++ = *buf_ptr++;
245*97c7d358SVivek Prakash 						*e_token++ = *buf_ptr++;
246*97c7d358SVivek Prakash 						seensfx |= 2;
247*97c7d358SVivek Prakash 						continue;
248*97c7d358SVivek Prakash 					}
249*97c7d358SVivek Prakash 					break;
250*97c7d358SVivek Prakash 				}
251*97c7d358SVivek Prakash 			}
252*97c7d358SVivek Prakash 		} else
253*97c7d358SVivek Prakash 			while (chartype[(int) *buf_ptr] == alphanum) {	/* copy it over */
254*97c7d358SVivek Prakash 				CHECK_SIZE_TOKEN;
255*97c7d358SVivek Prakash 				*e_token++ = *buf_ptr++;
256*97c7d358SVivek Prakash 				if (buf_ptr >= buf_end)
257*97c7d358SVivek Prakash 					fill_buffer();
258*97c7d358SVivek Prakash 			}
259*97c7d358SVivek Prakash 		*e_token++ = '\0';
260*97c7d358SVivek Prakash 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
261*97c7d358SVivek Prakash 			if (++buf_ptr >= buf_end)
262*97c7d358SVivek Prakash 				fill_buffer();
263*97c7d358SVivek Prakash 		}
264*97c7d358SVivek Prakash 		ps.its_a_keyword = false;
265*97c7d358SVivek Prakash 		ps.sizeof_keyword = false;
266*97c7d358SVivek Prakash 		if (l_struct) {	/* if last token was 'struct', then this token
267*97c7d358SVivek Prakash 				 * should be treated as a declaration */
268*97c7d358SVivek Prakash 			l_struct = false;
269*97c7d358SVivek Prakash 			last_code = ident;
270*97c7d358SVivek Prakash 			ps.last_u_d = true;
271*97c7d358SVivek Prakash 			return (decl);
272*97c7d358SVivek Prakash 		}
273*97c7d358SVivek Prakash 		ps.last_u_d = false;	/* Operator after indentifier is
274*97c7d358SVivek Prakash 					 * binary */
275*97c7d358SVivek Prakash 		last_code = ident;	/* Remember that this is the code we
276*97c7d358SVivek Prakash 					 * will return */
277*97c7d358SVivek Prakash 
278*97c7d358SVivek Prakash 		/*
279*97c7d358SVivek Prakash 		 * This loop will check if the token is a keyword.
280*97c7d358SVivek Prakash 		 */
281*97c7d358SVivek Prakash 		for (p = specials; (j = p->rwd) != 0; p++) {
282*97c7d358SVivek Prakash 			char   *pt = s_token;	/* point at scanned token */
283*97c7d358SVivek Prakash 			if (*j++ != *pt++ || *j++ != *pt++)
284*97c7d358SVivek Prakash 				continue;	/* This test depends on the
285*97c7d358SVivek Prakash 						 * fact that identifiers are
286*97c7d358SVivek Prakash 						 * always at least 1 character
287*97c7d358SVivek Prakash 						 * long (ie. the first two
288*97c7d358SVivek Prakash 						 * bytes of the identifier are
289*97c7d358SVivek Prakash 						 * always meaningful) */
290*97c7d358SVivek Prakash 			if (pt[-1] == 0)
291*97c7d358SVivek Prakash 				break;	/* If its a one-character identifier */
292*97c7d358SVivek Prakash 			while (*pt++ == *j)
293*97c7d358SVivek Prakash 				if (*j++ == 0)
294*97c7d358SVivek Prakash 					goto found_keyword;	/* I wish that C had a
295*97c7d358SVivek Prakash 								 * multi-level break... */
296*97c7d358SVivek Prakash 		}
297*97c7d358SVivek Prakash 		if (p->rwd) {	/* we have a keyword */
298*97c7d358SVivek Prakash 	found_keyword:
299*97c7d358SVivek Prakash 			ps.its_a_keyword = true;
300*97c7d358SVivek Prakash 			ps.last_u_d = true;
301*97c7d358SVivek Prakash 			switch (p->rwcode) {
302*97c7d358SVivek Prakash 			case 1:/* it is a switch */
303*97c7d358SVivek Prakash 				return (swstmt);
304*97c7d358SVivek Prakash 			case 2:/* a case or default */
305*97c7d358SVivek Prakash 				return (casestmt);
306*97c7d358SVivek Prakash 
307*97c7d358SVivek Prakash 			case 3:/* a "struct" */
308*97c7d358SVivek Prakash 				if (ps.p_l_follow)
309*97c7d358SVivek Prakash 					break;	/* inside parens: cast */
310*97c7d358SVivek Prakash 				l_struct = true;
311*97c7d358SVivek Prakash 
312*97c7d358SVivek Prakash 				/*
313*97c7d358SVivek Prakash 				 * Next time around, we will want to know that we have had a
314*97c7d358SVivek Prakash 				 * 'struct'
315*97c7d358SVivek Prakash 				 */
316*97c7d358SVivek Prakash 			case 4:/* one of the declaration keywords */
317*97c7d358SVivek Prakash 				if (ps.p_l_follow) {
318*97c7d358SVivek Prakash 					ps.cast_mask |= 1 << ps.p_l_follow;
319*97c7d358SVivek Prakash 					break;	/* inside parens: cast */
320*97c7d358SVivek Prakash 				}
321*97c7d358SVivek Prakash 				last_code = decl;
322*97c7d358SVivek Prakash 				return (decl);
323*97c7d358SVivek Prakash 
324*97c7d358SVivek Prakash 			case 5:/* if, while, for */
325*97c7d358SVivek Prakash 				return (sp_paren);
326*97c7d358SVivek Prakash 
327*97c7d358SVivek Prakash 			case 6:/* do, else */
328*97c7d358SVivek Prakash 				return (sp_nparen);
329*97c7d358SVivek Prakash 
330*97c7d358SVivek Prakash 			case 7:
331*97c7d358SVivek Prakash 				ps.sizeof_keyword = true;
332*97c7d358SVivek Prakash 			default:	/* all others are treated like any
333*97c7d358SVivek Prakash 					 * other identifier */
334*97c7d358SVivek Prakash 				return (ident);
335*97c7d358SVivek Prakash 			}	/* end of switch */
336*97c7d358SVivek Prakash 		}		/* end of if (found_it) */
337*97c7d358SVivek Prakash 		if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
338*97c7d358SVivek Prakash 			char   *tp = buf_ptr;
339*97c7d358SVivek Prakash 			while (tp < buf_end)
340*97c7d358SVivek Prakash 				if (*tp++ == ')' && (*tp == ';' || *tp == ','))
341*97c7d358SVivek Prakash 					goto not_proc;
342*97c7d358SVivek Prakash 			strncpy(ps.procname, token, sizeof ps.procname - 1);
343*97c7d358SVivek Prakash 			ps.in_parameter_declaration = 1;
344*97c7d358SVivek Prakash 			rparen_count = 1;
345*97c7d358SVivek Prakash 	not_proc:	;
346*97c7d358SVivek Prakash 		}
347*97c7d358SVivek Prakash 		/*
348*97c7d358SVivek Prakash 		 * The following hack attempts to guess whether or not the current
349*97c7d358SVivek Prakash 		 * token is in fact a declaration keyword -- one that has been
350*97c7d358SVivek Prakash 		 * typedefd
351*97c7d358SVivek Prakash 		 */
352*97c7d358SVivek Prakash 		if (((*buf_ptr == '*' && buf_ptr[1] != '=') ||
353*97c7d358SVivek Prakash 		    isalpha((unsigned char)*buf_ptr) || *buf_ptr == '_')
354*97c7d358SVivek Prakash 		    && !ps.p_l_follow
355*97c7d358SVivek Prakash 		    && !ps.block_init
356*97c7d358SVivek Prakash 		    && (ps.last_token == rparen || ps.last_token == semicolon ||
357*97c7d358SVivek Prakash 			ps.last_token == decl ||
358*97c7d358SVivek Prakash 			ps.last_token == lbrace || ps.last_token == rbrace)) {
359*97c7d358SVivek Prakash 			ps.its_a_keyword = true;
360*97c7d358SVivek Prakash 			ps.last_u_d = true;
361*97c7d358SVivek Prakash 			last_code = decl;
362*97c7d358SVivek Prakash 			return decl;
363*97c7d358SVivek Prakash 		}
364*97c7d358SVivek Prakash 		if (last_code == decl)	/* if this is a declared variable,
365*97c7d358SVivek Prakash 					 * then following sign is unary */
366*97c7d358SVivek Prakash 			ps.last_u_d = true;	/* will make "int a -1" work */
367*97c7d358SVivek Prakash 		last_code = ident;
368*97c7d358SVivek Prakash 		return (ident);	/* the ident is not in the list */
369*97c7d358SVivek Prakash 	}			/* end of procesing for alpanum character */
370*97c7d358SVivek Prakash 	/* Scan a non-alphanumeric token */
371*97c7d358SVivek Prakash 	*e_token++ = *buf_ptr;	/* if it is only a one-character token, it is
372*97c7d358SVivek Prakash 				 * moved here */
373*97c7d358SVivek Prakash 	*e_token = '\0';
374*97c7d358SVivek Prakash 	if (++buf_ptr >= buf_end)
375*97c7d358SVivek Prakash 		fill_buffer();
376*97c7d358SVivek Prakash 
377*97c7d358SVivek Prakash 	switch (*token) {
378*97c7d358SVivek Prakash 	case '\n':
379*97c7d358SVivek Prakash 		unary_delim = ps.last_u_d;
380*97c7d358SVivek Prakash 		ps.last_nl = true;	/* remember that we just had a newline */
381*97c7d358SVivek Prakash 		code = (had_eof ? 0 : newline);
382*97c7d358SVivek Prakash 
383*97c7d358SVivek Prakash 		/*
384*97c7d358SVivek Prakash 		 * if data has been exausted, the newline is a dummy, and we should
385*97c7d358SVivek Prakash 		 * return code to stop
386*97c7d358SVivek Prakash 		 */
387*97c7d358SVivek Prakash 		break;
388*97c7d358SVivek Prakash 
389*97c7d358SVivek Prakash 	case '\'':		/* start of quoted character */
390*97c7d358SVivek Prakash 	case '"':		/* start of string */
391*97c7d358SVivek Prakash 		qchar = *token;
392*97c7d358SVivek Prakash 		if (troff) {
393*97c7d358SVivek Prakash 			e_token[-1] = '`';
394*97c7d358SVivek Prakash 			if (qchar == '"')
395*97c7d358SVivek Prakash 				*e_token++ = '`';
396*97c7d358SVivek Prakash 			e_token = chfont(&bodyf, &stringf, e_token);
397*97c7d358SVivek Prakash 		}
398*97c7d358SVivek Prakash 		do {		/* copy the string */
399*97c7d358SVivek Prakash 			while (1) {	/* move one character or
400*97c7d358SVivek Prakash 					 * [/<char>]<char> */
401*97c7d358SVivek Prakash 				if (*buf_ptr == '\n') {
402*97c7d358SVivek Prakash 					printf("%d: Unterminated literal\n", line_no);
403*97c7d358SVivek Prakash 					goto stop_lit;
404*97c7d358SVivek Prakash 				}
405*97c7d358SVivek Prakash 				CHECK_SIZE_TOKEN;	/* Only have to do this
406*97c7d358SVivek Prakash 							 * once in this loop,
407*97c7d358SVivek Prakash 							 * since CHECK_SIZE
408*97c7d358SVivek Prakash 							 * guarantees that there
409*97c7d358SVivek Prakash 							 * are at least 5
410*97c7d358SVivek Prakash 							 * entries left */
411*97c7d358SVivek Prakash 				*e_token = *buf_ptr++;
412*97c7d358SVivek Prakash 				if (buf_ptr >= buf_end)
413*97c7d358SVivek Prakash 					fill_buffer();
414*97c7d358SVivek Prakash 				if (*e_token == BACKSLASH) {	/* if escape, copy extra
415*97c7d358SVivek Prakash 								 * char */
416*97c7d358SVivek Prakash 					if (*buf_ptr == '\n')	/* check for escaped
417*97c7d358SVivek Prakash 								 * newline */
418*97c7d358SVivek Prakash 						++line_no;
419*97c7d358SVivek Prakash 					if (troff) {
420*97c7d358SVivek Prakash 						*++e_token = BACKSLASH;
421*97c7d358SVivek Prakash 						if (*buf_ptr == BACKSLASH)
422*97c7d358SVivek Prakash 							*++e_token = BACKSLASH;
423*97c7d358SVivek Prakash 					}
424*97c7d358SVivek Prakash 					*++e_token = *buf_ptr++;
425*97c7d358SVivek Prakash 					++e_token;	/* we must increment
426*97c7d358SVivek Prakash 							 * this again because we
427*97c7d358SVivek Prakash 							 * copied two chars */
428*97c7d358SVivek Prakash 					if (buf_ptr >= buf_end)
429*97c7d358SVivek Prakash 						fill_buffer();
430*97c7d358SVivek Prakash 				} else
431*97c7d358SVivek Prakash 					break;	/* we copied one character */
432*97c7d358SVivek Prakash 			}	/* end of while (1) */
433*97c7d358SVivek Prakash 		} while (*e_token++ != qchar);
434*97c7d358SVivek Prakash 		if (troff) {
435*97c7d358SVivek Prakash 			e_token = chfont(&stringf, &bodyf, e_token - 1);
436*97c7d358SVivek Prakash 			if (qchar == '"')
437*97c7d358SVivek Prakash 				*e_token++ = '\'';
438*97c7d358SVivek Prakash 		}
439*97c7d358SVivek Prakash stop_lit:
440*97c7d358SVivek Prakash 		code = ident;
441*97c7d358SVivek Prakash 		break;
442*97c7d358SVivek Prakash 
443*97c7d358SVivek Prakash 	case ('('):
444*97c7d358SVivek Prakash 	case ('['):
445*97c7d358SVivek Prakash 		unary_delim = true;
446*97c7d358SVivek Prakash 		code = lparen;
447*97c7d358SVivek Prakash 		break;
448*97c7d358SVivek Prakash 
449*97c7d358SVivek Prakash 	case (')'):
450*97c7d358SVivek Prakash 	case (']'):
451*97c7d358SVivek Prakash 		code = rparen;
452*97c7d358SVivek Prakash 		break;
453*97c7d358SVivek Prakash 
454*97c7d358SVivek Prakash 	case '#':
455*97c7d358SVivek Prakash 		unary_delim = ps.last_u_d;
456*97c7d358SVivek Prakash 		code = preesc;
457*97c7d358SVivek Prakash 		break;
458*97c7d358SVivek Prakash 
459*97c7d358SVivek Prakash 	case '?':
460*97c7d358SVivek Prakash 		unary_delim = true;
461*97c7d358SVivek Prakash 		code = question;
462*97c7d358SVivek Prakash 		break;
463*97c7d358SVivek Prakash 
464*97c7d358SVivek Prakash 	case (':'):
465*97c7d358SVivek Prakash 		code = colon;
466*97c7d358SVivek Prakash 		unary_delim = true;
467*97c7d358SVivek Prakash 		break;
468*97c7d358SVivek Prakash 
469*97c7d358SVivek Prakash 	case (';'):
470*97c7d358SVivek Prakash 		unary_delim = true;
471*97c7d358SVivek Prakash 		code = semicolon;
472*97c7d358SVivek Prakash 		break;
473*97c7d358SVivek Prakash 
474*97c7d358SVivek Prakash 	case ('{'):
475*97c7d358SVivek Prakash 		unary_delim = true;
476*97c7d358SVivek Prakash 
477*97c7d358SVivek Prakash 		/*
478*97c7d358SVivek Prakash 		 * if (ps.in_or_st) ps.block_init = 1;
479*97c7d358SVivek Prakash 		 */
480*97c7d358SVivek Prakash 		/* ?	code = ps.block_init ? lparen : lbrace; */
481*97c7d358SVivek Prakash 		code = lbrace;
482*97c7d358SVivek Prakash 		break;
483*97c7d358SVivek Prakash 
484*97c7d358SVivek Prakash 	case ('}'):
485*97c7d358SVivek Prakash 		unary_delim = true;
486*97c7d358SVivek Prakash 		/* ?	code = ps.block_init ? rparen : rbrace; */
487*97c7d358SVivek Prakash 		code = rbrace;
488*97c7d358SVivek Prakash 		break;
489*97c7d358SVivek Prakash 
490*97c7d358SVivek Prakash 	case 014:		/* a form feed */
491*97c7d358SVivek Prakash 		unary_delim = ps.last_u_d;
492*97c7d358SVivek Prakash 		ps.last_nl = true;	/* remember this so we can set
493*97c7d358SVivek Prakash 					 * 'ps.col_1' right */
494*97c7d358SVivek Prakash 		code = form_feed;
495*97c7d358SVivek Prakash 		break;
496*97c7d358SVivek Prakash 
497*97c7d358SVivek Prakash 	case (','):
498*97c7d358SVivek Prakash 		unary_delim = true;
499*97c7d358SVivek Prakash 		code = comma;
500*97c7d358SVivek Prakash 		break;
501*97c7d358SVivek Prakash 
502*97c7d358SVivek Prakash 	case '.':
503*97c7d358SVivek Prakash 		unary_delim = false;
504*97c7d358SVivek Prakash 		code = period;
505*97c7d358SVivek Prakash 		break;
506*97c7d358SVivek Prakash 
507*97c7d358SVivek Prakash 	case '-':
508*97c7d358SVivek Prakash 	case '+':		/* check for -, +, --, ++ */
509*97c7d358SVivek Prakash 		code = (ps.last_u_d ? unary_op : binary_op);
510*97c7d358SVivek Prakash 		unary_delim = true;
511*97c7d358SVivek Prakash 
512*97c7d358SVivek Prakash 		if (*buf_ptr == token[0]) {
513*97c7d358SVivek Prakash 			/* check for doubled character */
514*97c7d358SVivek Prakash 			*e_token++ = *buf_ptr++;
515*97c7d358SVivek Prakash 			/* buffer overflow will be checked at end of loop */
516*97c7d358SVivek Prakash 			if (last_code == ident || last_code == rparen) {
517*97c7d358SVivek Prakash 				code = (ps.last_u_d ? unary_op : postop);
518*97c7d358SVivek Prakash 				/* check for following ++ or -- */
519*97c7d358SVivek Prakash 				unary_delim = false;
520*97c7d358SVivek Prakash 			}
521*97c7d358SVivek Prakash 		} else
522*97c7d358SVivek Prakash 			if (*buf_ptr == '=')
523*97c7d358SVivek Prakash 				/* check for operator += */
524*97c7d358SVivek Prakash 				*e_token++ = *buf_ptr++;
525*97c7d358SVivek Prakash 			else
526*97c7d358SVivek Prakash 				if (*buf_ptr == '>') {
527*97c7d358SVivek Prakash 					/* check for operator -> */
528*97c7d358SVivek Prakash 					*e_token++ = *buf_ptr++;
529*97c7d358SVivek Prakash 					if (!pointer_as_binop) {
530*97c7d358SVivek Prakash 						unary_delim = false;
531*97c7d358SVivek Prakash 						code = unary_op;
532*97c7d358SVivek Prakash 						ps.want_blank = false;
533*97c7d358SVivek Prakash 					}
534*97c7d358SVivek Prakash 				}
535*97c7d358SVivek Prakash 		break;		/* buffer overflow will be checked at end of
536*97c7d358SVivek Prakash 				 * switch */
537*97c7d358SVivek Prakash 
538*97c7d358SVivek Prakash 	case '=':
539*97c7d358SVivek Prakash 		if (ps.in_or_st)
540*97c7d358SVivek Prakash 			ps.block_init = 1;
541*97c7d358SVivek Prakash #ifdef undef
542*97c7d358SVivek Prakash 		if (chartype[*buf_ptr] == opchar) {	/* we have two char
543*97c7d358SVivek Prakash 							 * assignment */
544*97c7d358SVivek Prakash 			e_token[-1] = *buf_ptr++;
545*97c7d358SVivek Prakash 			if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
546*97c7d358SVivek Prakash 				*e_token++ = *buf_ptr++;
547*97c7d358SVivek Prakash 			*e_token++ = '=';	/* Flip =+ to += */
548*97c7d358SVivek Prakash 			*e_token = 0;
549*97c7d358SVivek Prakash 		}
550*97c7d358SVivek Prakash #else
551*97c7d358SVivek Prakash 		if (*buf_ptr == '=') {	/* == */
552*97c7d358SVivek Prakash 			*e_token++ = '=';	/* Flip =+ to += */
553*97c7d358SVivek Prakash 			buf_ptr++;
554*97c7d358SVivek Prakash 			*e_token = 0;
555*97c7d358SVivek Prakash 		}
556*97c7d358SVivek Prakash #endif
557*97c7d358SVivek Prakash 		code = binary_op;
558*97c7d358SVivek Prakash 		unary_delim = true;
559*97c7d358SVivek Prakash 		break;
560*97c7d358SVivek Prakash 		/* can drop thru!!! */
561*97c7d358SVivek Prakash 
562*97c7d358SVivek Prakash 	case '>':
563*97c7d358SVivek Prakash 	case '<':
564*97c7d358SVivek Prakash 	case '!':		/* ops like <, <<, <=, !=, etc */
565*97c7d358SVivek Prakash 		if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
566*97c7d358SVivek Prakash 			*e_token++ = *buf_ptr;
567*97c7d358SVivek Prakash 			if (++buf_ptr >= buf_end)
568*97c7d358SVivek Prakash 				fill_buffer();
569*97c7d358SVivek Prakash 		}
570*97c7d358SVivek Prakash 		if (*buf_ptr == '=')
571*97c7d358SVivek Prakash 			*e_token++ = *buf_ptr++;
572*97c7d358SVivek Prakash 		code = (ps.last_u_d ? unary_op : binary_op);
573*97c7d358SVivek Prakash 		unary_delim = true;
574*97c7d358SVivek Prakash 		break;
575*97c7d358SVivek Prakash 
576*97c7d358SVivek Prakash 	default:
577*97c7d358SVivek Prakash 		if (token[0] == '/' && *buf_ptr == '*') {
578*97c7d358SVivek Prakash 			/* it is start of comment */
579*97c7d358SVivek Prakash 			*e_token++ = '*';
580*97c7d358SVivek Prakash 
581*97c7d358SVivek Prakash 			if (++buf_ptr >= buf_end)
582*97c7d358SVivek Prakash 				fill_buffer();
583*97c7d358SVivek Prakash 
584*97c7d358SVivek Prakash 			code = comment;
585*97c7d358SVivek Prakash 			unary_delim = ps.last_u_d;
586*97c7d358SVivek Prakash 			break;
587*97c7d358SVivek Prakash 		}
588*97c7d358SVivek Prakash 		while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
589*97c7d358SVivek Prakash 			/*
590*97c7d358SVivek Prakash 		         * handle ||, &&, etc, and also things as in int *****i
591*97c7d358SVivek Prakash 		         */
592*97c7d358SVivek Prakash 			*e_token++ = *buf_ptr;
593*97c7d358SVivek Prakash 			if (++buf_ptr >= buf_end)
594*97c7d358SVivek Prakash 				fill_buffer();
595*97c7d358SVivek Prakash 		}
596*97c7d358SVivek Prakash 		code = (ps.last_u_d ? unary_op : binary_op);
597*97c7d358SVivek Prakash 		unary_delim = true;
598*97c7d358SVivek Prakash 
599*97c7d358SVivek Prakash 
600*97c7d358SVivek Prakash 	}			/* end of switch */
601*97c7d358SVivek Prakash 	if (code != newline) {
602*97c7d358SVivek Prakash 		l_struct = false;
603*97c7d358SVivek Prakash 		last_code = code;
604*97c7d358SVivek Prakash 	}
605*97c7d358SVivek Prakash 	if (buf_ptr >= buf_end)	/* check for input buffer empty */
606*97c7d358SVivek Prakash 		fill_buffer();
607*97c7d358SVivek Prakash 	ps.last_u_d = unary_delim;
608*97c7d358SVivek Prakash 	*e_token = '\0';	/* null terminate the token */
609*97c7d358SVivek Prakash 	return (code);
610*97c7d358SVivek Prakash }
611*97c7d358SVivek Prakash /*
612*97c7d358SVivek Prakash  * Add the given keyword to the keyword table, using val as the keyword type
613*97c7d358SVivek Prakash  */
614*97c7d358SVivek Prakash void
addkey(char * key,int val)615*97c7d358SVivek Prakash addkey(char *key, int val)
616*97c7d358SVivek Prakash {
617*97c7d358SVivek Prakash 	struct templ *p = specials;
618*97c7d358SVivek Prakash 	while (p->rwd)
619*97c7d358SVivek Prakash 		if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
620*97c7d358SVivek Prakash 			return;
621*97c7d358SVivek Prakash 		else
622*97c7d358SVivek Prakash 			p++;
623*97c7d358SVivek Prakash 	if (p >= specials + sizeof specials / sizeof specials[0])
624*97c7d358SVivek Prakash 		return;		/* For now, table overflows are silently
625*97c7d358SVivek Prakash 				 * ignored */
626*97c7d358SVivek Prakash 	p->rwd = key;
627*97c7d358SVivek Prakash 	p->rwcode = val;
628*97c7d358SVivek Prakash 	p[1].rwd = 0;
629*97c7d358SVivek Prakash 	p[1].rwcode = 0;
630*97c7d358SVivek Prakash }
631