xref: /csrg-svn/usr.bin/indent/lexi.c (revision 35500)
121970Sdist /*
2*35500Sbostic  * Copyright (c) 1985 Sun Microsystems, Inc.
3*35500Sbostic  * Copyright (c) 1980 The Regents of the University of California.
433767Sbostic  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
533767Sbostic  * All rights reserved.
633767Sbostic  *
733767Sbostic  * Redistribution and use in source and binary forms are permitted
834885Sbostic  * provided that the above copyright notice and this paragraph are
934885Sbostic  * duplicated in all such forms and that any documentation,
1034885Sbostic  * advertising materials, and other materials related to such
1134885Sbostic  * distribution and use acknowledge that the software was developed
12*35500Sbostic  * by the University of California, Berkeley, the University of Illinois,
13*35500Sbostic  * Urbana, and Sun Microsystems, Inc.  The name of either University
14*35500Sbostic  * or Sun Microsystems may not be used to endorse or promote products
15*35500Sbostic  * derived from this software without specific prior written permission.
1634885Sbostic  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1734885Sbostic  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1834885Sbostic  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1921970Sdist  */
208804Smckusick 
2121970Sdist #ifndef lint
22*35500Sbostic static char sccsid[] = "@(#)lexi.c	5.9 (Berkeley) 09/15/88";
2333767Sbostic #endif /* not lint */
2421970Sdist 
2533767Sbostic /*
26*35500Sbostic  * Here we have the token scanner for indent.  It scans off one token and puts
27*35500Sbostic  * it in the global variable "token".  It returns a code, indicating the type
28*35500Sbostic  * of token scanned.
2924455Smckusick  */
308804Smckusick 
31*35500Sbostic #include "indent_globs.h";
32*35500Sbostic #include "indent_codes.h";
3324455Smckusick #include "ctype.h"
348804Smckusick 
358804Smckusick #define alphanum 1
368804Smckusick #define opchar 3
378804Smckusick 
388804Smckusick struct templ {
3924455Smckusick     char       *rwd;
4024455Smckusick     int         rwcode;
418804Smckusick };
428804Smckusick 
4324455Smckusick struct templ specials[100] =
448804Smckusick {
458804Smckusick     "switch", 1,
468804Smckusick     "case", 2,
4724455Smckusick     "break", 0,
488804Smckusick     "struct", 3,
4924455Smckusick     "union", 3,
5024455Smckusick     "enum", 3,
518804Smckusick     "default", 2,
528804Smckusick     "int", 4,
538804Smckusick     "char", 4,
548804Smckusick     "float", 4,
558804Smckusick     "double", 4,
568804Smckusick     "long", 4,
578804Smckusick     "short", 4,
588804Smckusick     "typdef", 4,
598804Smckusick     "unsigned", 4,
608804Smckusick     "register", 4,
618804Smckusick     "static", 4,
628804Smckusick     "global", 4,
638804Smckusick     "extern", 4,
6424455Smckusick     "void", 4,
6524455Smckusick     "goto", 0,
6624455Smckusick     "return", 0,
678804Smckusick     "if", 5,
688804Smckusick     "while", 5,
698804Smckusick     "for", 5,
708804Smckusick     "else", 6,
718804Smckusick     "do", 6,
7224455Smckusick     "sizeof", 7,
738804Smckusick     0, 0
748804Smckusick };
758804Smckusick 
7624455Smckusick char        chartype[128] =
77*35500Sbostic {				/* this is used to facilitate the decision of
78*35500Sbostic 				 * what type (alphanumeric, operator) each
79*35500Sbostic 				 * character is */
808804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
818804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
828804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
838804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
8433768Sbostic     0, 3, 0, 0, 1, 3, 3, 0,
85*35500Sbostic     0, 0, 3, 3, 0, 3, 0, 3,
868804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
878804Smckusick     1, 1, 0, 0, 3, 3, 3, 3,
888804Smckusick     0, 1, 1, 1, 1, 1, 1, 1,
898804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
908804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
918804Smckusick     1, 1, 1, 0, 0, 0, 3, 1,
928804Smckusick     0, 1, 1, 1, 1, 1, 1, 1,
938804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
948804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
958804Smckusick     1, 1, 1, 0, 3, 0, 3, 0
968804Smckusick };
978804Smckusick 
988804Smckusick 
998804Smckusick 
1008804Smckusick 
101*35500Sbostic int
10224455Smckusick lexi()
10324455Smckusick {
10424455Smckusick     register char *tok;		/* local pointer to next char in token */
105*35500Sbostic     int         unary_delim;	/* this is set to 1 if the current token
106*35500Sbostic 				 *
10724455Smckusick 				 * forces a following operator to be unary */
10824455Smckusick     static int  last_code;	/* the last token type returned */
10924455Smckusick     static int  l_struct;	/* set to 1 if the last token was 'struct' */
11024455Smckusick     int         code;		/* internal code to be returned */
11124455Smckusick     char        qchar;		/* the delimiter character for a string */
1128804Smckusick 
11324455Smckusick     tok = token;		/* point to start of place to save token */
1148804Smckusick     unary_delim = false;
11524455Smckusick     ps.col_1 = ps.last_nl;	/* tell world that this token started in
116*35500Sbostic 				 * column 1 iff the last thing scanned was nl */
11724455Smckusick     ps.last_nl = false;
1188804Smckusick 
11924455Smckusick     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
120*35500Sbostic 	ps.col_1 = false;	/* leading blanks imply token is not in column
121*35500Sbostic 				 * 1 */
1228804Smckusick 	if (++buf_ptr >= buf_end)
12324455Smckusick 	    fill_buffer();
1248804Smckusick     }
1258804Smckusick 
126*35500Sbostic     /* Scan an alphanumeric token */
127*35500Sbostic     if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128*35500Sbostic 	/*
129*35500Sbostic 	 * we have a character or number
130*35500Sbostic 	 */
131*35500Sbostic 	register char *j;	/* used for searching thru list of
132*35500Sbostic 				 *
13324455Smckusick 				 * reserved words */
13424455Smckusick 	register struct templ *p;
1358804Smckusick 
136*35500Sbostic 	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
137*35500Sbostic 	    int         seendot = 0,
138*35500Sbostic 	                seenexp = 0;
139*35500Sbostic 	    if (*buf_ptr == '0' &&
140*35500Sbostic 		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
141*35500Sbostic 		*tok++ = *buf_ptr++;
142*35500Sbostic 		*tok++ = *buf_ptr++;
143*35500Sbostic 		while (isxdigit(*buf_ptr))
144*35500Sbostic 		    *tok++ = *buf_ptr++;
145*35500Sbostic 	    }
146*35500Sbostic 	    else
147*35500Sbostic 		while (1) {
148*35500Sbostic 		    if (*buf_ptr == '.')
149*35500Sbostic 			if (seendot)
150*35500Sbostic 			    break;
151*35500Sbostic 			else
152*35500Sbostic 			    seendot++;
153*35500Sbostic 		    *tok++ = *buf_ptr++;
154*35500Sbostic 		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
155*35500Sbostic 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
156*35500Sbostic 			    break;
157*35500Sbostic 			else {
158*35500Sbostic 			    seenexp++;
159*35500Sbostic 			    seendot++;
160*35500Sbostic 			    *tok++ = *buf_ptr++;
161*35500Sbostic 			    if (*buf_ptr == '+' || *buf_ptr == '-')
162*35500Sbostic 				*tok++ = *buf_ptr++;
163*35500Sbostic 			}
164*35500Sbostic 		}
165*35500Sbostic 	    if (*buf_ptr == 'L' || *buf_ptr == 'l')
166*35500Sbostic 		*tok++ = *buf_ptr++;
167*35500Sbostic 	}
168*35500Sbostic 	else
169*35500Sbostic 	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */
170*35500Sbostic 		*tok++ = *buf_ptr++;
171*35500Sbostic 		if (buf_ptr >= buf_end)
172*35500Sbostic 		    fill_buffer();
173*35500Sbostic 	    }
1748804Smckusick 	*tok++ = '\0';
17524455Smckusick 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
17624455Smckusick 	    if (++buf_ptr >= buf_end)
17724455Smckusick 		fill_buffer();
17824455Smckusick 	}
17924455Smckusick 	ps.its_a_keyword = false;
18024455Smckusick 	ps.sizeof_keyword = false;
181*35500Sbostic 	if (l_struct) {		/* if last token was 'struct', then this token
182*35500Sbostic 				 * should be treated as a declaration */
1838804Smckusick 	    l_struct = false;
1848804Smckusick 	    last_code = ident;
18524455Smckusick 	    ps.last_u_d = true;
1868804Smckusick 	    return (decl);
1878804Smckusick 	}
18824455Smckusick 	ps.last_u_d = false;	/* Operator after indentifier is binary */
18924455Smckusick 	last_code = ident;	/* Remember that this is the code we will
19024455Smckusick 				 * return */
1918804Smckusick 
19224455Smckusick 	/*
193*35500Sbostic 	 * This loop will check if the token is a keyword.
19424455Smckusick 	 */
19524455Smckusick 	for (p = specials; (j = p->rwd) != 0; p++) {
19624455Smckusick 	    tok = token;	/* point at scanned token */
19724455Smckusick 	    if (*j++ != *tok++ || *j++ != *tok++)
19824455Smckusick 		continue;	/* This test depends on the fact that
199*35500Sbostic 				 * identifiers are always at least 1 character
200*35500Sbostic 				 * long (ie. the first two bytes of the
201*35500Sbostic 				 * identifier are always meaningful) */
20224455Smckusick 	    if (tok[-1] == 0)
20324455Smckusick 		break;		/* If its a one-character identifier */
20424455Smckusick 	    while (*tok++ == *j)
20524455Smckusick 		if (*j++ == 0)
20624455Smckusick 		    goto found_keyword;	/* I wish that C had a multi-level
20724455Smckusick 					 * break... */
20824455Smckusick 	}
20924455Smckusick 	if (p->rwd) {		/* we have a keyword */
21024455Smckusick     found_keyword:
21124455Smckusick 	    ps.its_a_keyword = true;
21224455Smckusick 	    ps.last_u_d = true;
21324455Smckusick 	    switch (p->rwcode) {
214*35500Sbostic 	    case 1:		/* it is a switch */
215*35500Sbostic 		return (swstmt);
216*35500Sbostic 	    case 2:		/* a case or default */
217*35500Sbostic 		return (casestmt);
2188804Smckusick 
219*35500Sbostic 	    case 3:		/* a "struct" */
220*35500Sbostic 		if (ps.p_l_follow)
221*35500Sbostic 		    break;	/* inside parens: cast */
222*35500Sbostic 		l_struct = true;
2238804Smckusick 
224*35500Sbostic 		/*
225*35500Sbostic 		 * Next time around, we will want to know that we have had a
226*35500Sbostic 		 * 'struct'
227*35500Sbostic 		 */
228*35500Sbostic 	    case 4:		/* one of the declaration keywords */
229*35500Sbostic 		if (ps.p_l_follow) {
230*35500Sbostic 		    ps.cast_mask |= 1 << ps.p_l_follow;
231*35500Sbostic 		    break;	/* inside parens: cast */
232*35500Sbostic 		}
233*35500Sbostic 		last_code = decl;
234*35500Sbostic 		return (decl);
2358804Smckusick 
236*35500Sbostic 	    case 5:		/* if, while, for */
237*35500Sbostic 		return (sp_paren);
2388804Smckusick 
239*35500Sbostic 	    case 6:		/* do, else */
240*35500Sbostic 		return (sp_nparen);
2418804Smckusick 
242*35500Sbostic 	    case 7:
243*35500Sbostic 		ps.sizeof_keyword = true;
244*35500Sbostic 	    default:		/* all others are treated like any other
24524455Smckusick 				 * identifier */
246*35500Sbostic 		return (ident);
24724455Smckusick 	    }			/* end of switch */
24824455Smckusick 	}			/* end of if (found_it) */
249*35500Sbostic 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
250*35500Sbostic 	    register char *p = buf_ptr;
251*35500Sbostic 	    while (p < buf_end)
252*35500Sbostic 		if (*p++ == ')' && *p == ';')
253*35500Sbostic 		    goto not_proc;
25424455Smckusick 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
25524455Smckusick 	    ps.in_parameter_declaration = 1;
256*35500Sbostic     not_proc:;
25724455Smckusick 	}
25824455Smckusick 	/*
25924455Smckusick 	 * The following hack attempts to guess whether or not the current
26024455Smckusick 	 * token is in fact a declaration keyword -- one that has been
261*35500Sbostic 	 * typedefd
26224455Smckusick 	 */
263*35500Sbostic 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
264*35500Sbostic 		&& !ps.p_l_follow
265*35500Sbostic 	        && !ps.block_init
266*35500Sbostic 		&& (ps.last_token == rparen || ps.last_token == semicolon ||
267*35500Sbostic 		    ps.last_token == decl ||
268*35500Sbostic 		    ps.last_token == lbrace || ps.last_token == rbrace)) {
26924455Smckusick 	    ps.its_a_keyword = true;
27024455Smckusick 	    ps.last_u_d = true;
27124455Smckusick 	    last_code = decl;
27224455Smckusick 	    return decl;
2738804Smckusick 	}
27424455Smckusick 	if (last_code == decl)	/* if this is a declared variable, then
27524455Smckusick 				 * following sign is unary */
27624455Smckusick 	    ps.last_u_d = true;	/* will make "int a -1" work */
2778804Smckusick 	last_code = ident;
27824455Smckusick 	return (ident);		/* the ident is not in the list */
27924455Smckusick     }				/* end of procesing for alpanum character */
280*35500Sbostic     /* l l l Scan a non-alphanumeric token */
2818804Smckusick 
282*35500Sbostic     *tok++ = *buf_ptr;		/* if it is only a one-character token, it is
283*35500Sbostic 				 * moved here */
2848804Smckusick     *tok = '\0';
2858804Smckusick     if (++buf_ptr >= buf_end)
28624455Smckusick 	fill_buffer();
2878804Smckusick 
2888804Smckusick     switch (*token) {
289*35500Sbostic     case '\n':
290*35500Sbostic 	unary_delim = ps.last_u_d;
291*35500Sbostic 	ps.last_nl = true;	/* remember that we just had a newline */
292*35500Sbostic 	code = (had_eof ? 0 : newline);
29324455Smckusick 
294*35500Sbostic 	/*
295*35500Sbostic 	 * if data has been exausted, the newline is a dummy, and we should
296*35500Sbostic 	 * return code to stop
297*35500Sbostic 	 */
298*35500Sbostic 	break;
2998804Smckusick 
300*35500Sbostic     case '\'':			/* start of quoted character */
301*35500Sbostic     case '"':			/* start of string */
302*35500Sbostic 	qchar = *token;
303*35500Sbostic 	if (troff) {
304*35500Sbostic 	    tok[-1] = '`';
305*35500Sbostic 	    if (qchar == '"')
306*35500Sbostic 		*tok++ = '`';
307*35500Sbostic 	    tok = chfont(&bodyf, &stringf, tok);
308*35500Sbostic 	}
309*35500Sbostic 	do {			/* copy the string */
310*35500Sbostic 	    while (1) {		/* move one character or [/<char>]<char> */
311*35500Sbostic 		if (*buf_ptr == '\n') {
312*35500Sbostic 		    printf("%d: Unterminated literal\n", line_no);
313*35500Sbostic 		    goto stop_lit;
314*35500Sbostic 		}
315*35500Sbostic 		*tok = *buf_ptr++;
316*35500Sbostic 		if (buf_ptr >= buf_end)
317*35500Sbostic 		    fill_buffer();
318*35500Sbostic 		if (had_eof || ((tok - token) > (bufsize - 2))) {
319*35500Sbostic 		    printf("Unterminated literal\n");
320*35500Sbostic 		    ++tok;
321*35500Sbostic 		    goto stop_lit;
322*35500Sbostic 		    /* get outof literal copying loop */
323*35500Sbostic 		}
324*35500Sbostic 		if (*tok == BACKSLASH) {	/* if escape, copy extra char */
325*35500Sbostic 		    if (*buf_ptr == '\n')	/* check for escaped newline */
326*35500Sbostic 			++line_no;
327*35500Sbostic 		    if (troff) {
328*35500Sbostic 			*++tok = BACKSLASH;
329*35500Sbostic 			if (*buf_ptr == BACKSLASH)
330*35500Sbostic 			    *++tok = BACKSLASH;
3318804Smckusick 		    }
332*35500Sbostic 		    *++tok = *buf_ptr++;
333*35500Sbostic 		    ++tok;	/* we must increment this again because we
334*35500Sbostic 				 * copied two chars */
3358804Smckusick 		    if (buf_ptr >= buf_end)
33624455Smckusick 			fill_buffer();
337*35500Sbostic 		}
338*35500Sbostic 		else
339*35500Sbostic 		    break;	/* we copied one character */
340*35500Sbostic 	    }			/* end of while (1) */
341*35500Sbostic 	} while (*tok++ != qchar);
342*35500Sbostic 	if (troff) {
343*35500Sbostic 	    tok = chfont(&stringf, &bodyf, tok - 1);
344*35500Sbostic 	    if (qchar == '"')
34524455Smckusick 		*tok++ = '\'';
346*35500Sbostic 	}
347*35500Sbostic stop_lit:
348*35500Sbostic 	code = ident;
349*35500Sbostic 	break;
3508804Smckusick 
351*35500Sbostic     case ('('):
352*35500Sbostic     case ('['):
353*35500Sbostic 	unary_delim = true;
354*35500Sbostic 	code = lparen;
355*35500Sbostic 	break;
3568804Smckusick 
357*35500Sbostic     case (')'):
358*35500Sbostic     case (']'):
359*35500Sbostic 	code = rparen;
360*35500Sbostic 	break;
3618804Smckusick 
362*35500Sbostic     case '#':
363*35500Sbostic 	unary_delim = ps.last_u_d;
364*35500Sbostic 	code = preesc;
365*35500Sbostic 	break;
3668804Smckusick 
367*35500Sbostic     case '?':
368*35500Sbostic 	unary_delim = true;
369*35500Sbostic 	code = question;
370*35500Sbostic 	break;
3718804Smckusick 
372*35500Sbostic     case (':'):
373*35500Sbostic 	code = colon;
374*35500Sbostic 	unary_delim = true;
375*35500Sbostic 	break;
3768804Smckusick 
377*35500Sbostic     case (';'):
378*35500Sbostic 	unary_delim = true;
379*35500Sbostic 	code = semicolon;
380*35500Sbostic 	break;
3818804Smckusick 
382*35500Sbostic     case ('{'):
383*35500Sbostic 	unary_delim = true;
38424455Smckusick 
385*35500Sbostic 	/*
386*35500Sbostic 	 * if (ps.in_or_st) ps.block_init = 1;
387*35500Sbostic 	 */
388*35500Sbostic 	/* ?	code = ps.block_init ? lparen : lbrace; */
389*35500Sbostic 	code = lbrace;
390*35500Sbostic 	break;
3918804Smckusick 
392*35500Sbostic     case ('}'):
393*35500Sbostic 	unary_delim = true;
394*35500Sbostic 	/* ?	code = ps.block_init ? rparen : rbrace; */
395*35500Sbostic 	code = rbrace;
396*35500Sbostic 	break;
3978804Smckusick 
398*35500Sbostic     case 014:			/* a form feed */
399*35500Sbostic 	unary_delim = ps.last_u_d;
400*35500Sbostic 	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
40124455Smckusick 				 * right */
402*35500Sbostic 	code = form_feed;
403*35500Sbostic 	break;
4048804Smckusick 
405*35500Sbostic     case (','):
406*35500Sbostic 	unary_delim = true;
407*35500Sbostic 	code = comma;
408*35500Sbostic 	break;
4098804Smckusick 
410*35500Sbostic     case '.':
411*35500Sbostic 	unary_delim = false;
412*35500Sbostic 	code = period;
413*35500Sbostic 	break;
4148804Smckusick 
415*35500Sbostic     case '-':
416*35500Sbostic     case '+':			/* check for -, +, --, ++ */
417*35500Sbostic 	code = (ps.last_u_d ? unary_op : binary_op);
418*35500Sbostic 	unary_delim = true;
4198804Smckusick 
420*35500Sbostic 	if (*buf_ptr == token[0]) {
421*35500Sbostic 	    /* check for doubled character */
422*35500Sbostic 	    *tok++ = *buf_ptr++;
423*35500Sbostic 	    /* buffer overflow will be checked at end of loop */
424*35500Sbostic 	    if (last_code == ident || last_code == rparen) {
425*35500Sbostic 		code = (ps.last_u_d ? unary_op : postop);
426*35500Sbostic 		/* check for following ++ or -- */
427*35500Sbostic 		unary_delim = false;
4288804Smckusick 	    }
429*35500Sbostic 	}
430*35500Sbostic 	else if (*buf_ptr == '=')
431*35500Sbostic 	    /* check for operator += */
432*35500Sbostic 	    *tok++ = *buf_ptr++;
433*35500Sbostic 	else if (*buf_ptr == '>') {
434*35500Sbostic 	    /* check for operator -> */
435*35500Sbostic 	    *tok++ = *buf_ptr++;
436*35500Sbostic 	    if (!pointer_as_binop) {
437*35500Sbostic 		unary_delim = false;
438*35500Sbostic 		code = unary_op;
439*35500Sbostic 		ps.want_blank = false;
44024455Smckusick 	    }
441*35500Sbostic 	}
442*35500Sbostic 	break;			/* buffer overflow will be checked at end of
443*35500Sbostic 				 * switch */
4448804Smckusick 
445*35500Sbostic     case '=':
446*35500Sbostic 	if (ps.in_or_st)
447*35500Sbostic 	    ps.block_init = 1;
448*35500Sbostic #ifdef undef
449*35500Sbostic 	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
450*35500Sbostic 	    tok[-1] = *buf_ptr++;
451*35500Sbostic 	    if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
452*35500Sbostic 		*tok++ = *buf_ptr++;
453*35500Sbostic 	    *tok++ = '=';	/* Flip =+ to += */
454*35500Sbostic 	    *tok = 0;
455*35500Sbostic 	}
456*35500Sbostic #else
457*35500Sbostic 	if (*buf_ptr == '=') {/* == */
458*35500Sbostic 	    *tok++ = '=';	/* Flip =+ to += */
459*35500Sbostic 	    buf_ptr++;
460*35500Sbostic 	    *tok = 0;
461*35500Sbostic 	}
462*35500Sbostic #endif
463*35500Sbostic 	code = binary_op;
464*35500Sbostic 	unary_delim = true;
465*35500Sbostic 	break;
466*35500Sbostic 	/* can drop thru!!! */
4678804Smckusick 
468*35500Sbostic     case '>':
469*35500Sbostic     case '<':
470*35500Sbostic     case '!':			/* ops like <, <<, <=, !=, etc */
471*35500Sbostic 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
472*35500Sbostic 	    *tok++ = *buf_ptr;
473*35500Sbostic 	    if (++buf_ptr >= buf_end)
474*35500Sbostic 		fill_buffer();
475*35500Sbostic 	}
476*35500Sbostic 	if (*buf_ptr == '=')
477*35500Sbostic 	    *tok++ = *buf_ptr++;
478*35500Sbostic 	code = (ps.last_u_d ? unary_op : binary_op);
479*35500Sbostic 	unary_delim = true;
480*35500Sbostic 	break;
4818804Smckusick 
482*35500Sbostic     default:
483*35500Sbostic 	if (token[0] == '/' && *buf_ptr == '*') {
484*35500Sbostic 	    /* it is start of comment */
485*35500Sbostic 	    *tok++ = '*';
4868804Smckusick 
487*35500Sbostic 	    if (++buf_ptr >= buf_end)
488*35500Sbostic 		fill_buffer();
4898804Smckusick 
490*35500Sbostic 	    code = comment;
491*35500Sbostic 	    unary_delim = ps.last_u_d;
492*35500Sbostic 	    break;
493*35500Sbostic 	}
494*35500Sbostic 	while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
495*35500Sbostic 	    /*
496*35500Sbostic 	     * handle ||, &&, etc, and also things as in int *****i
497*35500Sbostic 	     */
498*35500Sbostic 	    *tok++ = *buf_ptr;
499*35500Sbostic 	    if (++buf_ptr >= buf_end)
500*35500Sbostic 		fill_buffer();
501*35500Sbostic 	}
502*35500Sbostic 	code = (ps.last_u_d ? unary_op : binary_op);
503*35500Sbostic 	unary_delim = true;
5048804Smckusick 
5058804Smckusick 
50624455Smckusick     }				/* end of switch */
5078804Smckusick     if (code != newline) {
5088804Smckusick 	l_struct = false;
5098804Smckusick 	last_code = code;
5108804Smckusick     }
51124455Smckusick     if (buf_ptr >= buf_end)	/* check for input buffer empty */
51224455Smckusick 	fill_buffer();
51324455Smckusick     ps.last_u_d = unary_delim;
51424455Smckusick     *tok = '\0';		/* null terminate the token */
5158804Smckusick     return (code);
5168804Smckusick };
51724455Smckusick 
518*35500Sbostic /*
519*35500Sbostic  * Add the given keyword to the keyword table, using val as the keyword type
520*35500Sbostic  */
521*35500Sbostic addkey(key, val)
522*35500Sbostic     char       *key;
52324455Smckusick {
52424455Smckusick     register struct templ *p = specials;
52524455Smckusick     while (p->rwd)
52624455Smckusick 	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
52724455Smckusick 	    return;
52824455Smckusick 	else
52924455Smckusick 	    p++;
53024455Smckusick     if (p >= specials + sizeof specials / sizeof specials[0])
53124455Smckusick 	return;			/* For now, table overflows are silently
532*35500Sbostic 				 * ignored */
53324455Smckusick     p->rwd = key;
53424455Smckusick     p->rwcode = val;
53524455Smckusick     p[1].rwd = 0;
53624455Smckusick     p[1].rwcode = 0;
53724455Smckusick     return;
53824455Smckusick }
539