1*97c7d358SVivek Prakash /* $NetBSD: lexi.c,v 1.13 2009/04/12 11:09:49 lukem Exp $ */
2*97c7d358SVivek Prakash
3*97c7d358SVivek Prakash /*
4*97c7d358SVivek Prakash * Copyright (c) 1980, 1993
5*97c7d358SVivek Prakash * The Regents of the University of California. All rights reserved.
6*97c7d358SVivek Prakash *
7*97c7d358SVivek Prakash * Redistribution and use in source and binary forms, with or without
8*97c7d358SVivek Prakash * modification, are permitted provided that the following conditions
9*97c7d358SVivek Prakash * are met:
10*97c7d358SVivek Prakash * 1. Redistributions of source code must retain the above copyright
11*97c7d358SVivek Prakash * notice, this list of conditions and the following disclaimer.
12*97c7d358SVivek Prakash * 2. Redistributions in binary form must reproduce the above copyright
13*97c7d358SVivek Prakash * notice, this list of conditions and the following disclaimer in the
14*97c7d358SVivek Prakash * documentation and/or other materials provided with the distribution.
15*97c7d358SVivek Prakash * 3. Neither the name of the University nor the names of its contributors
16*97c7d358SVivek Prakash * may be used to endorse or promote products derived from this software
17*97c7d358SVivek Prakash * without specific prior written permission.
18*97c7d358SVivek Prakash *
19*97c7d358SVivek Prakash * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20*97c7d358SVivek Prakash * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21*97c7d358SVivek Prakash * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22*97c7d358SVivek Prakash * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23*97c7d358SVivek Prakash * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24*97c7d358SVivek Prakash * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25*97c7d358SVivek Prakash * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26*97c7d358SVivek Prakash * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27*97c7d358SVivek Prakash * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28*97c7d358SVivek Prakash * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29*97c7d358SVivek Prakash * SUCH DAMAGE.
30*97c7d358SVivek Prakash */
31*97c7d358SVivek Prakash
32*97c7d358SVivek Prakash /*
33*97c7d358SVivek Prakash * Copyright (c) 1976 Board of Trustees of the University of Illinois.
34*97c7d358SVivek Prakash * Copyright (c) 1985 Sun Microsystems, Inc.
35*97c7d358SVivek Prakash * All rights reserved.
36*97c7d358SVivek Prakash *
37*97c7d358SVivek Prakash * Redistribution and use in source and binary forms, with or without
38*97c7d358SVivek Prakash * modification, are permitted provided that the following conditions
39*97c7d358SVivek Prakash * are met:
40*97c7d358SVivek Prakash * 1. Redistributions of source code must retain the above copyright
41*97c7d358SVivek Prakash * notice, this list of conditions and the following disclaimer.
42*97c7d358SVivek Prakash * 2. Redistributions in binary form must reproduce the above copyright
43*97c7d358SVivek Prakash * notice, this list of conditions and the following disclaimer in the
44*97c7d358SVivek Prakash * documentation and/or other materials provided with the distribution.
45*97c7d358SVivek Prakash * 3. All advertising materials mentioning features or use of this software
46*97c7d358SVivek Prakash * must display the following acknowledgement:
47*97c7d358SVivek Prakash * This product includes software developed by the University of
48*97c7d358SVivek Prakash * California, Berkeley and its contributors.
49*97c7d358SVivek Prakash * 4. Neither the name of the University nor the names of its contributors
50*97c7d358SVivek Prakash * may be used to endorse or promote products derived from this software
51*97c7d358SVivek Prakash * without specific prior written permission.
52*97c7d358SVivek Prakash *
53*97c7d358SVivek Prakash * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54*97c7d358SVivek Prakash * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55*97c7d358SVivek Prakash * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56*97c7d358SVivek Prakash * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57*97c7d358SVivek Prakash * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58*97c7d358SVivek Prakash * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59*97c7d358SVivek Prakash * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60*97c7d358SVivek Prakash * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61*97c7d358SVivek Prakash * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62*97c7d358SVivek Prakash * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63*97c7d358SVivek Prakash * SUCH DAMAGE.
64*97c7d358SVivek Prakash */
65*97c7d358SVivek Prakash
66*97c7d358SVivek Prakash #include <sys/cdefs.h>
67*97c7d358SVivek Prakash #ifndef lint
68*97c7d358SVivek Prakash #if 0
69*97c7d358SVivek Prakash static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
70*97c7d358SVivek Prakash #else
71*97c7d358SVivek Prakash __RCSID("$NetBSD: lexi.c,v 1.13 2009/04/12 11:09:49 lukem Exp $");
72*97c7d358SVivek Prakash #endif
73*97c7d358SVivek Prakash #endif /* not lint */
74*97c7d358SVivek Prakash
75*97c7d358SVivek Prakash /*
76*97c7d358SVivek Prakash * Here we have the token scanner for indent. It scans off one token and puts
77*97c7d358SVivek Prakash * it in the global variable "token". It returns a code, indicating the type
78*97c7d358SVivek Prakash * of token scanned.
79*97c7d358SVivek Prakash */
80*97c7d358SVivek Prakash
81*97c7d358SVivek Prakash #include <stdio.h>
82*97c7d358SVivek Prakash #include <ctype.h>
83*97c7d358SVivek Prakash #include <stdlib.h>
84*97c7d358SVivek Prakash #include <string.h>
85*97c7d358SVivek Prakash #include "indent_globs.h"
86*97c7d358SVivek Prakash #include "indent_codes.h"
87*97c7d358SVivek Prakash
88*97c7d358SVivek Prakash #define alphanum 1
89*97c7d358SVivek Prakash #define opchar 3
90*97c7d358SVivek Prakash
91*97c7d358SVivek Prakash struct templ {
92*97c7d358SVivek Prakash const char *rwd;
93*97c7d358SVivek Prakash int rwcode;
94*97c7d358SVivek Prakash };
95*97c7d358SVivek Prakash
96*97c7d358SVivek Prakash struct templ specials[1000] =
97*97c7d358SVivek Prakash {
98*97c7d358SVivek Prakash {"switch", 1},
99*97c7d358SVivek Prakash {"case", 2},
100*97c7d358SVivek Prakash {"break", 0},
101*97c7d358SVivek Prakash {"struct", 3},
102*97c7d358SVivek Prakash {"union", 3},
103*97c7d358SVivek Prakash {"enum", 3},
104*97c7d358SVivek Prakash {"default", 2},
105*97c7d358SVivek Prakash {"int", 4},
106*97c7d358SVivek Prakash {"char", 4},
107*97c7d358SVivek Prakash {"float", 4},
108*97c7d358SVivek Prakash {"double", 4},
109*97c7d358SVivek Prakash {"long", 4},
110*97c7d358SVivek Prakash {"short", 4},
111*97c7d358SVivek Prakash {"typdef", 4},
112*97c7d358SVivek Prakash {"unsigned", 4},
113*97c7d358SVivek Prakash {"register", 4},
114*97c7d358SVivek Prakash {"static", 4},
115*97c7d358SVivek Prakash {"global", 4},
116*97c7d358SVivek Prakash {"extern", 4},
117*97c7d358SVivek Prakash {"void", 4},
118*97c7d358SVivek Prakash {"goto", 0},
119*97c7d358SVivek Prakash {"return", 0},
120*97c7d358SVivek Prakash {"if", 5},
121*97c7d358SVivek Prakash {"while", 5},
122*97c7d358SVivek Prakash {"for", 5},
123*97c7d358SVivek Prakash {"else", 6},
124*97c7d358SVivek Prakash {"do", 6},
125*97c7d358SVivek Prakash {"sizeof", 7},
126*97c7d358SVivek Prakash {0, 0}
127*97c7d358SVivek Prakash };
128*97c7d358SVivek Prakash
129*97c7d358SVivek Prakash char chartype[128] =
130*97c7d358SVivek Prakash { /* this is used to facilitate the decision of
131*97c7d358SVivek Prakash * what type (alphanumeric, operator) each
132*97c7d358SVivek Prakash * character is */
133*97c7d358SVivek Prakash 0, 0, 0, 0, 0, 0, 0, 0,
134*97c7d358SVivek Prakash 0, 0, 0, 0, 0, 0, 0, 0,
135*97c7d358SVivek Prakash 0, 0, 0, 0, 0, 0, 0, 0,
136*97c7d358SVivek Prakash 0, 0, 0, 0, 0, 0, 0, 0,
137*97c7d358SVivek Prakash 0, 3, 0, 0, 1, 3, 3, 0,
138*97c7d358SVivek Prakash 0, 0, 3, 3, 0, 3, 0, 3,
139*97c7d358SVivek Prakash 1, 1, 1, 1, 1, 1, 1, 1,
140*97c7d358SVivek Prakash 1, 1, 0, 0, 3, 3, 3, 3,
141*97c7d358SVivek Prakash 0, 1, 1, 1, 1, 1, 1, 1,
142*97c7d358SVivek Prakash 1, 1, 1, 1, 1, 1, 1, 1,
143*97c7d358SVivek Prakash 1, 1, 1, 1, 1, 1, 1, 1,
144*97c7d358SVivek Prakash 1, 1, 1, 0, 0, 0, 3, 1,
145*97c7d358SVivek Prakash 0, 1, 1, 1, 1, 1, 1, 1,
146*97c7d358SVivek Prakash 1, 1, 1, 1, 1, 1, 1, 1,
147*97c7d358SVivek Prakash 1, 1, 1, 1, 1, 1, 1, 1,
148*97c7d358SVivek Prakash 1, 1, 1, 0, 3, 0, 3, 0
149*97c7d358SVivek Prakash };
150*97c7d358SVivek Prakash
151*97c7d358SVivek Prakash
152*97c7d358SVivek Prakash
153*97c7d358SVivek Prakash
154*97c7d358SVivek Prakash int
lexi(void)155*97c7d358SVivek Prakash lexi(void)
156*97c7d358SVivek Prakash {
157*97c7d358SVivek Prakash int unary_delim; /* this is set to 1 if the current token
158*97c7d358SVivek Prakash *
159*97c7d358SVivek Prakash * forces a following operator to be unary */
160*97c7d358SVivek Prakash static int last_code; /* the last token type returned */
161*97c7d358SVivek Prakash static int l_struct; /* set to 1 if the last token was 'struct' */
162*97c7d358SVivek Prakash int code; /* internal code to be returned */
163*97c7d358SVivek Prakash char qchar; /* the delimiter character for a string */
164*97c7d358SVivek Prakash
165*97c7d358SVivek Prakash e_token = s_token; /* point to start of place to save token */
166*97c7d358SVivek Prakash unary_delim = false;
167*97c7d358SVivek Prakash ps.col_1 = ps.last_nl; /* tell world that this token started in
168*97c7d358SVivek Prakash * column 1 iff the last thing scanned was nl */
169*97c7d358SVivek Prakash ps.last_nl = false;
170*97c7d358SVivek Prakash
171*97c7d358SVivek Prakash while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
172*97c7d358SVivek Prakash ps.col_1 = false; /* leading blanks imply token is not
173*97c7d358SVivek Prakash * in column 1 */
174*97c7d358SVivek Prakash if (++buf_ptr >= buf_end)
175*97c7d358SVivek Prakash fill_buffer();
176*97c7d358SVivek Prakash }
177*97c7d358SVivek Prakash
178*97c7d358SVivek Prakash /* Scan an alphanumeric token */
179*97c7d358SVivek Prakash if (chartype[(int) *buf_ptr] == alphanum ||
180*97c7d358SVivek Prakash (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
181*97c7d358SVivek Prakash /*
182*97c7d358SVivek Prakash * we have a character or number
183*97c7d358SVivek Prakash */
184*97c7d358SVivek Prakash const char *j; /* used for searching thru list of
185*97c7d358SVivek Prakash * reserved words */
186*97c7d358SVivek Prakash struct templ *p;
187*97c7d358SVivek Prakash
188*97c7d358SVivek Prakash if (isdigit((unsigned char)*buf_ptr) ||
189*97c7d358SVivek Prakash (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
190*97c7d358SVivek Prakash int seendot = 0, seenexp = 0, seensfx = 0;
191*97c7d358SVivek Prakash if (*buf_ptr == '0' &&
192*97c7d358SVivek Prakash (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
193*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
194*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
195*97c7d358SVivek Prakash while (isxdigit((unsigned char)*buf_ptr)) {
196*97c7d358SVivek Prakash CHECK_SIZE_TOKEN;
197*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
198*97c7d358SVivek Prakash }
199*97c7d358SVivek Prakash } else {
200*97c7d358SVivek Prakash while (1) {
201*97c7d358SVivek Prakash if (*buf_ptr == '.') {
202*97c7d358SVivek Prakash if (seendot)
203*97c7d358SVivek Prakash break;
204*97c7d358SVivek Prakash else
205*97c7d358SVivek Prakash seendot++;
206*97c7d358SVivek Prakash }
207*97c7d358SVivek Prakash CHECK_SIZE_TOKEN;
208*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
209*97c7d358SVivek Prakash if (!isdigit((unsigned char)*buf_ptr)
210*97c7d358SVivek Prakash && *buf_ptr != '.') {
211*97c7d358SVivek Prakash if ((*buf_ptr != 'E'
212*97c7d358SVivek Prakash && *buf_ptr != 'e') || seenexp)
213*97c7d358SVivek Prakash break;
214*97c7d358SVivek Prakash else {
215*97c7d358SVivek Prakash seenexp++;
216*97c7d358SVivek Prakash seendot++;
217*97c7d358SVivek Prakash CHECK_SIZE_TOKEN;
218*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
219*97c7d358SVivek Prakash if (*buf_ptr == '+' || *buf_ptr == '-')
220*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
221*97c7d358SVivek Prakash }
222*97c7d358SVivek Prakash }
223*97c7d358SVivek Prakash }
224*97c7d358SVivek Prakash }
225*97c7d358SVivek Prakash if (*buf_ptr == 'F' || *buf_ptr == 'f') {
226*97c7d358SVivek Prakash /* float constant */
227*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
228*97c7d358SVivek Prakash } else {
229*97c7d358SVivek Prakash /* integer constant */
230*97c7d358SVivek Prakash while (1) {
231*97c7d358SVivek Prakash if (!(seensfx & 1) &&
232*97c7d358SVivek Prakash (*buf_ptr == 'U' ||
233*97c7d358SVivek Prakash *buf_ptr == 'u')) {
234*97c7d358SVivek Prakash CHECK_SIZE_TOKEN;
235*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
236*97c7d358SVivek Prakash seensfx |= 1;
237*97c7d358SVivek Prakash continue;
238*97c7d358SVivek Prakash }
239*97c7d358SVivek Prakash if (!(seensfx & 2) &&
240*97c7d358SVivek Prakash (*buf_ptr == 'L' ||
241*97c7d358SVivek Prakash *buf_ptr == 'l')) {
242*97c7d358SVivek Prakash CHECK_SIZE_TOKEN;
243*97c7d358SVivek Prakash if (buf_ptr[1] == buf_ptr[0])
244*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
245*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
246*97c7d358SVivek Prakash seensfx |= 2;
247*97c7d358SVivek Prakash continue;
248*97c7d358SVivek Prakash }
249*97c7d358SVivek Prakash break;
250*97c7d358SVivek Prakash }
251*97c7d358SVivek Prakash }
252*97c7d358SVivek Prakash } else
253*97c7d358SVivek Prakash while (chartype[(int) *buf_ptr] == alphanum) { /* copy it over */
254*97c7d358SVivek Prakash CHECK_SIZE_TOKEN;
255*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
256*97c7d358SVivek Prakash if (buf_ptr >= buf_end)
257*97c7d358SVivek Prakash fill_buffer();
258*97c7d358SVivek Prakash }
259*97c7d358SVivek Prakash *e_token++ = '\0';
260*97c7d358SVivek Prakash while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
261*97c7d358SVivek Prakash if (++buf_ptr >= buf_end)
262*97c7d358SVivek Prakash fill_buffer();
263*97c7d358SVivek Prakash }
264*97c7d358SVivek Prakash ps.its_a_keyword = false;
265*97c7d358SVivek Prakash ps.sizeof_keyword = false;
266*97c7d358SVivek Prakash if (l_struct) { /* if last token was 'struct', then this token
267*97c7d358SVivek Prakash * should be treated as a declaration */
268*97c7d358SVivek Prakash l_struct = false;
269*97c7d358SVivek Prakash last_code = ident;
270*97c7d358SVivek Prakash ps.last_u_d = true;
271*97c7d358SVivek Prakash return (decl);
272*97c7d358SVivek Prakash }
273*97c7d358SVivek Prakash ps.last_u_d = false; /* Operator after indentifier is
274*97c7d358SVivek Prakash * binary */
275*97c7d358SVivek Prakash last_code = ident; /* Remember that this is the code we
276*97c7d358SVivek Prakash * will return */
277*97c7d358SVivek Prakash
278*97c7d358SVivek Prakash /*
279*97c7d358SVivek Prakash * This loop will check if the token is a keyword.
280*97c7d358SVivek Prakash */
281*97c7d358SVivek Prakash for (p = specials; (j = p->rwd) != 0; p++) {
282*97c7d358SVivek Prakash char *pt = s_token; /* point at scanned token */
283*97c7d358SVivek Prakash if (*j++ != *pt++ || *j++ != *pt++)
284*97c7d358SVivek Prakash continue; /* This test depends on the
285*97c7d358SVivek Prakash * fact that identifiers are
286*97c7d358SVivek Prakash * always at least 1 character
287*97c7d358SVivek Prakash * long (ie. the first two
288*97c7d358SVivek Prakash * bytes of the identifier are
289*97c7d358SVivek Prakash * always meaningful) */
290*97c7d358SVivek Prakash if (pt[-1] == 0)
291*97c7d358SVivek Prakash break; /* If its a one-character identifier */
292*97c7d358SVivek Prakash while (*pt++ == *j)
293*97c7d358SVivek Prakash if (*j++ == 0)
294*97c7d358SVivek Prakash goto found_keyword; /* I wish that C had a
295*97c7d358SVivek Prakash * multi-level break... */
296*97c7d358SVivek Prakash }
297*97c7d358SVivek Prakash if (p->rwd) { /* we have a keyword */
298*97c7d358SVivek Prakash found_keyword:
299*97c7d358SVivek Prakash ps.its_a_keyword = true;
300*97c7d358SVivek Prakash ps.last_u_d = true;
301*97c7d358SVivek Prakash switch (p->rwcode) {
302*97c7d358SVivek Prakash case 1:/* it is a switch */
303*97c7d358SVivek Prakash return (swstmt);
304*97c7d358SVivek Prakash case 2:/* a case or default */
305*97c7d358SVivek Prakash return (casestmt);
306*97c7d358SVivek Prakash
307*97c7d358SVivek Prakash case 3:/* a "struct" */
308*97c7d358SVivek Prakash if (ps.p_l_follow)
309*97c7d358SVivek Prakash break; /* inside parens: cast */
310*97c7d358SVivek Prakash l_struct = true;
311*97c7d358SVivek Prakash
312*97c7d358SVivek Prakash /*
313*97c7d358SVivek Prakash * Next time around, we will want to know that we have had a
314*97c7d358SVivek Prakash * 'struct'
315*97c7d358SVivek Prakash */
316*97c7d358SVivek Prakash case 4:/* one of the declaration keywords */
317*97c7d358SVivek Prakash if (ps.p_l_follow) {
318*97c7d358SVivek Prakash ps.cast_mask |= 1 << ps.p_l_follow;
319*97c7d358SVivek Prakash break; /* inside parens: cast */
320*97c7d358SVivek Prakash }
321*97c7d358SVivek Prakash last_code = decl;
322*97c7d358SVivek Prakash return (decl);
323*97c7d358SVivek Prakash
324*97c7d358SVivek Prakash case 5:/* if, while, for */
325*97c7d358SVivek Prakash return (sp_paren);
326*97c7d358SVivek Prakash
327*97c7d358SVivek Prakash case 6:/* do, else */
328*97c7d358SVivek Prakash return (sp_nparen);
329*97c7d358SVivek Prakash
330*97c7d358SVivek Prakash case 7:
331*97c7d358SVivek Prakash ps.sizeof_keyword = true;
332*97c7d358SVivek Prakash default: /* all others are treated like any
333*97c7d358SVivek Prakash * other identifier */
334*97c7d358SVivek Prakash return (ident);
335*97c7d358SVivek Prakash } /* end of switch */
336*97c7d358SVivek Prakash } /* end of if (found_it) */
337*97c7d358SVivek Prakash if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
338*97c7d358SVivek Prakash char *tp = buf_ptr;
339*97c7d358SVivek Prakash while (tp < buf_end)
340*97c7d358SVivek Prakash if (*tp++ == ')' && (*tp == ';' || *tp == ','))
341*97c7d358SVivek Prakash goto not_proc;
342*97c7d358SVivek Prakash strncpy(ps.procname, token, sizeof ps.procname - 1);
343*97c7d358SVivek Prakash ps.in_parameter_declaration = 1;
344*97c7d358SVivek Prakash rparen_count = 1;
345*97c7d358SVivek Prakash not_proc: ;
346*97c7d358SVivek Prakash }
347*97c7d358SVivek Prakash /*
348*97c7d358SVivek Prakash * The following hack attempts to guess whether or not the current
349*97c7d358SVivek Prakash * token is in fact a declaration keyword -- one that has been
350*97c7d358SVivek Prakash * typedefd
351*97c7d358SVivek Prakash */
352*97c7d358SVivek Prakash if (((*buf_ptr == '*' && buf_ptr[1] != '=') ||
353*97c7d358SVivek Prakash isalpha((unsigned char)*buf_ptr) || *buf_ptr == '_')
354*97c7d358SVivek Prakash && !ps.p_l_follow
355*97c7d358SVivek Prakash && !ps.block_init
356*97c7d358SVivek Prakash && (ps.last_token == rparen || ps.last_token == semicolon ||
357*97c7d358SVivek Prakash ps.last_token == decl ||
358*97c7d358SVivek Prakash ps.last_token == lbrace || ps.last_token == rbrace)) {
359*97c7d358SVivek Prakash ps.its_a_keyword = true;
360*97c7d358SVivek Prakash ps.last_u_d = true;
361*97c7d358SVivek Prakash last_code = decl;
362*97c7d358SVivek Prakash return decl;
363*97c7d358SVivek Prakash }
364*97c7d358SVivek Prakash if (last_code == decl) /* if this is a declared variable,
365*97c7d358SVivek Prakash * then following sign is unary */
366*97c7d358SVivek Prakash ps.last_u_d = true; /* will make "int a -1" work */
367*97c7d358SVivek Prakash last_code = ident;
368*97c7d358SVivek Prakash return (ident); /* the ident is not in the list */
369*97c7d358SVivek Prakash } /* end of procesing for alpanum character */
370*97c7d358SVivek Prakash /* Scan a non-alphanumeric token */
371*97c7d358SVivek Prakash *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
372*97c7d358SVivek Prakash * moved here */
373*97c7d358SVivek Prakash *e_token = '\0';
374*97c7d358SVivek Prakash if (++buf_ptr >= buf_end)
375*97c7d358SVivek Prakash fill_buffer();
376*97c7d358SVivek Prakash
377*97c7d358SVivek Prakash switch (*token) {
378*97c7d358SVivek Prakash case '\n':
379*97c7d358SVivek Prakash unary_delim = ps.last_u_d;
380*97c7d358SVivek Prakash ps.last_nl = true; /* remember that we just had a newline */
381*97c7d358SVivek Prakash code = (had_eof ? 0 : newline);
382*97c7d358SVivek Prakash
383*97c7d358SVivek Prakash /*
384*97c7d358SVivek Prakash * if data has been exausted, the newline is a dummy, and we should
385*97c7d358SVivek Prakash * return code to stop
386*97c7d358SVivek Prakash */
387*97c7d358SVivek Prakash break;
388*97c7d358SVivek Prakash
389*97c7d358SVivek Prakash case '\'': /* start of quoted character */
390*97c7d358SVivek Prakash case '"': /* start of string */
391*97c7d358SVivek Prakash qchar = *token;
392*97c7d358SVivek Prakash if (troff) {
393*97c7d358SVivek Prakash e_token[-1] = '`';
394*97c7d358SVivek Prakash if (qchar == '"')
395*97c7d358SVivek Prakash *e_token++ = '`';
396*97c7d358SVivek Prakash e_token = chfont(&bodyf, &stringf, e_token);
397*97c7d358SVivek Prakash }
398*97c7d358SVivek Prakash do { /* copy the string */
399*97c7d358SVivek Prakash while (1) { /* move one character or
400*97c7d358SVivek Prakash * [/<char>]<char> */
401*97c7d358SVivek Prakash if (*buf_ptr == '\n') {
402*97c7d358SVivek Prakash printf("%d: Unterminated literal\n", line_no);
403*97c7d358SVivek Prakash goto stop_lit;
404*97c7d358SVivek Prakash }
405*97c7d358SVivek Prakash CHECK_SIZE_TOKEN; /* Only have to do this
406*97c7d358SVivek Prakash * once in this loop,
407*97c7d358SVivek Prakash * since CHECK_SIZE
408*97c7d358SVivek Prakash * guarantees that there
409*97c7d358SVivek Prakash * are at least 5
410*97c7d358SVivek Prakash * entries left */
411*97c7d358SVivek Prakash *e_token = *buf_ptr++;
412*97c7d358SVivek Prakash if (buf_ptr >= buf_end)
413*97c7d358SVivek Prakash fill_buffer();
414*97c7d358SVivek Prakash if (*e_token == BACKSLASH) { /* if escape, copy extra
415*97c7d358SVivek Prakash * char */
416*97c7d358SVivek Prakash if (*buf_ptr == '\n') /* check for escaped
417*97c7d358SVivek Prakash * newline */
418*97c7d358SVivek Prakash ++line_no;
419*97c7d358SVivek Prakash if (troff) {
420*97c7d358SVivek Prakash *++e_token = BACKSLASH;
421*97c7d358SVivek Prakash if (*buf_ptr == BACKSLASH)
422*97c7d358SVivek Prakash *++e_token = BACKSLASH;
423*97c7d358SVivek Prakash }
424*97c7d358SVivek Prakash *++e_token = *buf_ptr++;
425*97c7d358SVivek Prakash ++e_token; /* we must increment
426*97c7d358SVivek Prakash * this again because we
427*97c7d358SVivek Prakash * copied two chars */
428*97c7d358SVivek Prakash if (buf_ptr >= buf_end)
429*97c7d358SVivek Prakash fill_buffer();
430*97c7d358SVivek Prakash } else
431*97c7d358SVivek Prakash break; /* we copied one character */
432*97c7d358SVivek Prakash } /* end of while (1) */
433*97c7d358SVivek Prakash } while (*e_token++ != qchar);
434*97c7d358SVivek Prakash if (troff) {
435*97c7d358SVivek Prakash e_token = chfont(&stringf, &bodyf, e_token - 1);
436*97c7d358SVivek Prakash if (qchar == '"')
437*97c7d358SVivek Prakash *e_token++ = '\'';
438*97c7d358SVivek Prakash }
439*97c7d358SVivek Prakash stop_lit:
440*97c7d358SVivek Prakash code = ident;
441*97c7d358SVivek Prakash break;
442*97c7d358SVivek Prakash
443*97c7d358SVivek Prakash case ('('):
444*97c7d358SVivek Prakash case ('['):
445*97c7d358SVivek Prakash unary_delim = true;
446*97c7d358SVivek Prakash code = lparen;
447*97c7d358SVivek Prakash break;
448*97c7d358SVivek Prakash
449*97c7d358SVivek Prakash case (')'):
450*97c7d358SVivek Prakash case (']'):
451*97c7d358SVivek Prakash code = rparen;
452*97c7d358SVivek Prakash break;
453*97c7d358SVivek Prakash
454*97c7d358SVivek Prakash case '#':
455*97c7d358SVivek Prakash unary_delim = ps.last_u_d;
456*97c7d358SVivek Prakash code = preesc;
457*97c7d358SVivek Prakash break;
458*97c7d358SVivek Prakash
459*97c7d358SVivek Prakash case '?':
460*97c7d358SVivek Prakash unary_delim = true;
461*97c7d358SVivek Prakash code = question;
462*97c7d358SVivek Prakash break;
463*97c7d358SVivek Prakash
464*97c7d358SVivek Prakash case (':'):
465*97c7d358SVivek Prakash code = colon;
466*97c7d358SVivek Prakash unary_delim = true;
467*97c7d358SVivek Prakash break;
468*97c7d358SVivek Prakash
469*97c7d358SVivek Prakash case (';'):
470*97c7d358SVivek Prakash unary_delim = true;
471*97c7d358SVivek Prakash code = semicolon;
472*97c7d358SVivek Prakash break;
473*97c7d358SVivek Prakash
474*97c7d358SVivek Prakash case ('{'):
475*97c7d358SVivek Prakash unary_delim = true;
476*97c7d358SVivek Prakash
477*97c7d358SVivek Prakash /*
478*97c7d358SVivek Prakash * if (ps.in_or_st) ps.block_init = 1;
479*97c7d358SVivek Prakash */
480*97c7d358SVivek Prakash /* ? code = ps.block_init ? lparen : lbrace; */
481*97c7d358SVivek Prakash code = lbrace;
482*97c7d358SVivek Prakash break;
483*97c7d358SVivek Prakash
484*97c7d358SVivek Prakash case ('}'):
485*97c7d358SVivek Prakash unary_delim = true;
486*97c7d358SVivek Prakash /* ? code = ps.block_init ? rparen : rbrace; */
487*97c7d358SVivek Prakash code = rbrace;
488*97c7d358SVivek Prakash break;
489*97c7d358SVivek Prakash
490*97c7d358SVivek Prakash case 014: /* a form feed */
491*97c7d358SVivek Prakash unary_delim = ps.last_u_d;
492*97c7d358SVivek Prakash ps.last_nl = true; /* remember this so we can set
493*97c7d358SVivek Prakash * 'ps.col_1' right */
494*97c7d358SVivek Prakash code = form_feed;
495*97c7d358SVivek Prakash break;
496*97c7d358SVivek Prakash
497*97c7d358SVivek Prakash case (','):
498*97c7d358SVivek Prakash unary_delim = true;
499*97c7d358SVivek Prakash code = comma;
500*97c7d358SVivek Prakash break;
501*97c7d358SVivek Prakash
502*97c7d358SVivek Prakash case '.':
503*97c7d358SVivek Prakash unary_delim = false;
504*97c7d358SVivek Prakash code = period;
505*97c7d358SVivek Prakash break;
506*97c7d358SVivek Prakash
507*97c7d358SVivek Prakash case '-':
508*97c7d358SVivek Prakash case '+': /* check for -, +, --, ++ */
509*97c7d358SVivek Prakash code = (ps.last_u_d ? unary_op : binary_op);
510*97c7d358SVivek Prakash unary_delim = true;
511*97c7d358SVivek Prakash
512*97c7d358SVivek Prakash if (*buf_ptr == token[0]) {
513*97c7d358SVivek Prakash /* check for doubled character */
514*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
515*97c7d358SVivek Prakash /* buffer overflow will be checked at end of loop */
516*97c7d358SVivek Prakash if (last_code == ident || last_code == rparen) {
517*97c7d358SVivek Prakash code = (ps.last_u_d ? unary_op : postop);
518*97c7d358SVivek Prakash /* check for following ++ or -- */
519*97c7d358SVivek Prakash unary_delim = false;
520*97c7d358SVivek Prakash }
521*97c7d358SVivek Prakash } else
522*97c7d358SVivek Prakash if (*buf_ptr == '=')
523*97c7d358SVivek Prakash /* check for operator += */
524*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
525*97c7d358SVivek Prakash else
526*97c7d358SVivek Prakash if (*buf_ptr == '>') {
527*97c7d358SVivek Prakash /* check for operator -> */
528*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
529*97c7d358SVivek Prakash if (!pointer_as_binop) {
530*97c7d358SVivek Prakash unary_delim = false;
531*97c7d358SVivek Prakash code = unary_op;
532*97c7d358SVivek Prakash ps.want_blank = false;
533*97c7d358SVivek Prakash }
534*97c7d358SVivek Prakash }
535*97c7d358SVivek Prakash break; /* buffer overflow will be checked at end of
536*97c7d358SVivek Prakash * switch */
537*97c7d358SVivek Prakash
538*97c7d358SVivek Prakash case '=':
539*97c7d358SVivek Prakash if (ps.in_or_st)
540*97c7d358SVivek Prakash ps.block_init = 1;
541*97c7d358SVivek Prakash #ifdef undef
542*97c7d358SVivek Prakash if (chartype[*buf_ptr] == opchar) { /* we have two char
543*97c7d358SVivek Prakash * assignment */
544*97c7d358SVivek Prakash e_token[-1] = *buf_ptr++;
545*97c7d358SVivek Prakash if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
546*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
547*97c7d358SVivek Prakash *e_token++ = '='; /* Flip =+ to += */
548*97c7d358SVivek Prakash *e_token = 0;
549*97c7d358SVivek Prakash }
550*97c7d358SVivek Prakash #else
551*97c7d358SVivek Prakash if (*buf_ptr == '=') { /* == */
552*97c7d358SVivek Prakash *e_token++ = '='; /* Flip =+ to += */
553*97c7d358SVivek Prakash buf_ptr++;
554*97c7d358SVivek Prakash *e_token = 0;
555*97c7d358SVivek Prakash }
556*97c7d358SVivek Prakash #endif
557*97c7d358SVivek Prakash code = binary_op;
558*97c7d358SVivek Prakash unary_delim = true;
559*97c7d358SVivek Prakash break;
560*97c7d358SVivek Prakash /* can drop thru!!! */
561*97c7d358SVivek Prakash
562*97c7d358SVivek Prakash case '>':
563*97c7d358SVivek Prakash case '<':
564*97c7d358SVivek Prakash case '!': /* ops like <, <<, <=, !=, etc */
565*97c7d358SVivek Prakash if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
566*97c7d358SVivek Prakash *e_token++ = *buf_ptr;
567*97c7d358SVivek Prakash if (++buf_ptr >= buf_end)
568*97c7d358SVivek Prakash fill_buffer();
569*97c7d358SVivek Prakash }
570*97c7d358SVivek Prakash if (*buf_ptr == '=')
571*97c7d358SVivek Prakash *e_token++ = *buf_ptr++;
572*97c7d358SVivek Prakash code = (ps.last_u_d ? unary_op : binary_op);
573*97c7d358SVivek Prakash unary_delim = true;
574*97c7d358SVivek Prakash break;
575*97c7d358SVivek Prakash
576*97c7d358SVivek Prakash default:
577*97c7d358SVivek Prakash if (token[0] == '/' && *buf_ptr == '*') {
578*97c7d358SVivek Prakash /* it is start of comment */
579*97c7d358SVivek Prakash *e_token++ = '*';
580*97c7d358SVivek Prakash
581*97c7d358SVivek Prakash if (++buf_ptr >= buf_end)
582*97c7d358SVivek Prakash fill_buffer();
583*97c7d358SVivek Prakash
584*97c7d358SVivek Prakash code = comment;
585*97c7d358SVivek Prakash unary_delim = ps.last_u_d;
586*97c7d358SVivek Prakash break;
587*97c7d358SVivek Prakash }
588*97c7d358SVivek Prakash while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
589*97c7d358SVivek Prakash /*
590*97c7d358SVivek Prakash * handle ||, &&, etc, and also things as in int *****i
591*97c7d358SVivek Prakash */
592*97c7d358SVivek Prakash *e_token++ = *buf_ptr;
593*97c7d358SVivek Prakash if (++buf_ptr >= buf_end)
594*97c7d358SVivek Prakash fill_buffer();
595*97c7d358SVivek Prakash }
596*97c7d358SVivek Prakash code = (ps.last_u_d ? unary_op : binary_op);
597*97c7d358SVivek Prakash unary_delim = true;
598*97c7d358SVivek Prakash
599*97c7d358SVivek Prakash
600*97c7d358SVivek Prakash } /* end of switch */
601*97c7d358SVivek Prakash if (code != newline) {
602*97c7d358SVivek Prakash l_struct = false;
603*97c7d358SVivek Prakash last_code = code;
604*97c7d358SVivek Prakash }
605*97c7d358SVivek Prakash if (buf_ptr >= buf_end) /* check for input buffer empty */
606*97c7d358SVivek Prakash fill_buffer();
607*97c7d358SVivek Prakash ps.last_u_d = unary_delim;
608*97c7d358SVivek Prakash *e_token = '\0'; /* null terminate the token */
609*97c7d358SVivek Prakash return (code);
610*97c7d358SVivek Prakash }
611*97c7d358SVivek Prakash /*
612*97c7d358SVivek Prakash * Add the given keyword to the keyword table, using val as the keyword type
613*97c7d358SVivek Prakash */
614*97c7d358SVivek Prakash void
addkey(char * key,int val)615*97c7d358SVivek Prakash addkey(char *key, int val)
616*97c7d358SVivek Prakash {
617*97c7d358SVivek Prakash struct templ *p = specials;
618*97c7d358SVivek Prakash while (p->rwd)
619*97c7d358SVivek Prakash if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
620*97c7d358SVivek Prakash return;
621*97c7d358SVivek Prakash else
622*97c7d358SVivek Prakash p++;
623*97c7d358SVivek Prakash if (p >= specials + sizeof specials / sizeof specials[0])
624*97c7d358SVivek Prakash return; /* For now, table overflows are silently
625*97c7d358SVivek Prakash * ignored */
626*97c7d358SVivek Prakash p->rwd = key;
627*97c7d358SVivek Prakash p->rwcode = val;
628*97c7d358SVivek Prakash p[1].rwd = 0;
629*97c7d358SVivek Prakash p[1].rwcode = 0;
630*97c7d358SVivek Prakash }
631