xref: /csrg-svn/usr.bin/indent/lexi.c (revision 34885)
121970Sdist /*
221970Sdist  * Copyright (c) 1980 Regents of the University of California.
333767Sbostic  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
433767Sbostic  * All rights reserved.
533767Sbostic  *
633767Sbostic  * Redistribution and use in source and binary forms are permitted
7*34885Sbostic  * provided that the above copyright notice and this paragraph are
8*34885Sbostic  * duplicated in all such forms and that any documentation,
9*34885Sbostic  * advertising materials, and other materials related to such
10*34885Sbostic  * distribution and use acknowledge that the software was developed
11*34885Sbostic  * by the University of California, Berkeley and the University
12*34885Sbostic  * of Illinois, Urbana.  The name of either
13*34885Sbostic  * University may not be used to endorse or promote products derived
14*34885Sbostic  * from this software without specific prior written permission.
15*34885Sbostic  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16*34885Sbostic  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17*34885Sbostic  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1821970Sdist  */
198804Smckusick 
2021970Sdist #ifndef lint
21*34885Sbostic static char sccsid[] = "@(#)lexi.c	5.8 (Berkeley) 06/29/88";
2233767Sbostic #endif /* not lint */
2321970Sdist 
2433767Sbostic /*
2524455Smckusick  * NAME:
2624455Smckusick  *	lexi
2724455Smckusick  *
2824455Smckusick  * FUNCTION:
2924455Smckusick  *	This is the token scanner for indent
3024455Smckusick  *
3124455Smckusick  * ALGORITHM:
3224455Smckusick  *	1) Strip off intervening blanks and/or tabs.
3324455Smckusick  *	2) If it is an alphanumeric token, move it to the token buffer "token".
3424455Smckusick  *	   Check if it is a special reserved word that indent will want to
3524455Smckusick  *	   know about.
3624455Smckusick  *	3) Non-alphanumeric tokens are handled with a big switch statement.  A
3724455Smckusick  *	   flag is kept to remember if the last token was a "unary delimiter",
3824455Smckusick  *	   which forces a following operator to be unary as opposed to binary.
3924455Smckusick  *
4024455Smckusick  * PARAMETERS:
4124455Smckusick  *	None
4224455Smckusick  *
4324455Smckusick  * RETURNS:
4424455Smckusick  *	An integer code indicating the type of token scanned.
4524455Smckusick  *
4624455Smckusick  * GLOBALS:
4724455Smckusick  *	buf_ptr =
4824455Smckusick  *	had_eof
4924455Smckusick  *	ps.last_u_d =	Set to true iff this token is a "unary delimiter"
5024455Smckusick  *
5124455Smckusick  * CALLS:
5224455Smckusick  *	fill_buffer
5324455Smckusick  *	printf (lib)
5424455Smckusick  *
5524455Smckusick  * CALLED BY:
5624455Smckusick  *	main
5724455Smckusick  *
5824455Smckusick  * NOTES:
5924455Smckusick  *	Start of comment is passed back so that the comment can be scanned by
6024455Smckusick  *	pr_comment.
6124455Smckusick  *
6224455Smckusick  *	Strings and character literals are returned just like identifiers.
6324455Smckusick  *
6424455Smckusick  * HISTORY:
6524455Smckusick  *	initial coding 	November 1976	D A Willcox of CAC
6624455Smckusick  *	1/7/77		D A Willcox of CAC	Fix to provide proper handling
6724455Smckusick  *						of "int a -1;"
6824455Smckusick  *
6924455Smckusick  */
7024455Smckusick 
718804Smckusick /*
7224455Smckusick  * Here we have the token scanner for indent.  It scans off one token and
7324455Smckusick  * puts it in the global variable "token".  It returns a code, indicating
7424455Smckusick  * the type of token scanned.
7524455Smckusick  */
768804Smckusick 
7733230Sbostic #include "indent_globs.h"
7833230Sbostic #include "indent_codes.h"
7924455Smckusick #include "ctype.h"
808804Smckusick 
818804Smckusick #define alphanum 1
828804Smckusick #define opchar 3
838804Smckusick 
848804Smckusick struct templ {
8524455Smckusick     char       *rwd;
8624455Smckusick     int         rwcode;
878804Smckusick };
888804Smckusick 
8924455Smckusick struct templ specials[100] =
908804Smckusick {
918804Smckusick     "switch", 1,
928804Smckusick     "case", 2,
9324455Smckusick     "break", 0,
948804Smckusick     "struct", 3,
9524455Smckusick     "union", 3,
9624455Smckusick     "enum", 3,
978804Smckusick     "default", 2,
988804Smckusick     "int", 4,
998804Smckusick     "char", 4,
1008804Smckusick     "float", 4,
1018804Smckusick     "double", 4,
1028804Smckusick     "long", 4,
1038804Smckusick     "short", 4,
1048804Smckusick     "typdef", 4,
1058804Smckusick     "unsigned", 4,
1068804Smckusick     "register", 4,
1078804Smckusick     "static", 4,
1088804Smckusick     "global", 4,
1098804Smckusick     "extern", 4,
11024455Smckusick     "void", 4,
11124455Smckusick     "goto", 0,
11224455Smckusick     "return", 0,
1138804Smckusick     "if", 5,
1148804Smckusick     "while", 5,
1158804Smckusick     "for", 5,
1168804Smckusick     "else", 6,
1178804Smckusick     "do", 6,
11824455Smckusick     "sizeof", 7,
1198804Smckusick     0, 0
1208804Smckusick };
1218804Smckusick 
12224455Smckusick char        chartype[128] =
12324455Smckusick {				/* this is used to facilitate the decision
12424455Smckusick 				 * of what type (alphanumeric, operator)
12524455Smckusick 				 * each character is */
1268804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
1278804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
1288804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
1298804Smckusick     0, 0, 0, 0, 0, 0, 0, 0,
13033768Sbostic     0, 3, 0, 0, 1, 3, 3, 0,
1318804Smckusick     0, 0, 3, 3, 0, 3, 3, 3,
1328804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
1338804Smckusick     1, 1, 0, 0, 3, 3, 3, 3,
1348804Smckusick     0, 1, 1, 1, 1, 1, 1, 1,
1358804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
1368804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
1378804Smckusick     1, 1, 1, 0, 0, 0, 3, 1,
1388804Smckusick     0, 1, 1, 1, 1, 1, 1, 1,
1398804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
1408804Smckusick     1, 1, 1, 1, 1, 1, 1, 1,
1418804Smckusick     1, 1, 1, 0, 3, 0, 3, 0
1428804Smckusick };
1438804Smckusick 
1448804Smckusick 
1458804Smckusick 
1468804Smckusick 
14724455Smckusick int
14824455Smckusick lexi()
14924455Smckusick {
15024455Smckusick     register char *tok;		/* local pointer to next char in token */
15124455Smckusick     int         unary_delim;	/* this is set to 1 if the current token
15224455Smckusick 				 *
15324455Smckusick 				 * forces a following operator to be unary */
15424455Smckusick     static int  last_code;	/* the last token type returned */
15524455Smckusick     static int  l_struct;	/* set to 1 if the last token was 'struct' */
15624455Smckusick     int         code;		/* internal code to be returned */
15724455Smckusick     char        qchar;		/* the delimiter character for a string */
1588804Smckusick 
15924455Smckusick     tok = token;		/* point to start of place to save token */
1608804Smckusick     unary_delim = false;
16124455Smckusick     ps.col_1 = ps.last_nl;	/* tell world that this token started in
16224455Smckusick 				 * column 1 iff the last thing scanned was
16324455Smckusick 				 * nl */
16424455Smckusick     ps.last_nl = false;
1658804Smckusick 
16624455Smckusick     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
16724455Smckusick 	ps.col_1 = false;	/* leading blanks imply token is not in
16824455Smckusick 				 * column 1 */
1698804Smckusick 	if (++buf_ptr >= buf_end)
17024455Smckusick 	    fill_buffer();
1718804Smckusick     }
1728804Smckusick 
17324649Smckusick     /* Scan an alphanumeric token.  Note that we must also handle
17424649Smckusick      * stuff like "1.0e+03" and "7e-6". */
17524455Smckusick     if (chartype[*buf_ptr & 0177] == alphanum) {	/* we have a character
17624455Smckusick 							 * or number */
17724455Smckusick 	register char *j;	/* used for searching thru list of
17824455Smckusick 				 * reserved words */
17924455Smckusick 	register struct templ *p;
18024649Smckusick 	register int c;
1818804Smckusick 
18224649Smckusick 	do {			/* copy it over */
1838804Smckusick 	    *tok++ = *buf_ptr++;
1848804Smckusick 	    if (buf_ptr >= buf_end)
18524455Smckusick 		fill_buffer();
18624649Smckusick 	} while (chartype[c = *buf_ptr & 0177] == alphanum ||
18724649Smckusick 		isdigit(token[0]) && (c == '+' || c == '-') &&
18824649Smckusick 		(tok[-1] == 'e' || tok[-1] == 'E'));
1898804Smckusick 	*tok++ = '\0';
19024455Smckusick 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
19124455Smckusick 	    if (++buf_ptr >= buf_end)
19224455Smckusick 		fill_buffer();
19324455Smckusick 	}
19424455Smckusick 	ps.its_a_keyword = false;
19524455Smckusick 	ps.sizeof_keyword = false;
19624455Smckusick 	if (l_struct) {		/* if last token was 'struct', then this
19724455Smckusick 				 * token should be treated as a
19824455Smckusick 				 * declaration */
1998804Smckusick 	    l_struct = false;
2008804Smckusick 	    last_code = ident;
20124455Smckusick 	    ps.last_u_d = true;
2028804Smckusick 	    return (decl);
2038804Smckusick 	}
20424455Smckusick 	ps.last_u_d = false;	/* Operator after indentifier is binary */
20524455Smckusick 	last_code = ident;	/* Remember that this is the code we will
20624455Smckusick 				 * return */
2078804Smckusick 
20824455Smckusick 	/*
20924455Smckusick 	 * This loop will check if the token is a keyword.
21024455Smckusick 	 */
21124455Smckusick 	for (p = specials; (j = p->rwd) != 0; p++) {
21224455Smckusick 	    tok = token;	/* point at scanned token */
21324455Smckusick 	    if (*j++ != *tok++ || *j++ != *tok++)
21424455Smckusick 		continue;	/* This test depends on the fact that
21524455Smckusick 				 * identifiers are always at least 1
21624455Smckusick 				 * character long (ie. the first two bytes
21724455Smckusick 				 * of the identifier are always
21824455Smckusick 				 * meaningful) */
21924455Smckusick 	    if (tok[-1] == 0)
22024455Smckusick 		break;		/* If its a one-character identifier */
22124455Smckusick 	    while (*tok++ == *j)
22224455Smckusick 		if (*j++ == 0)
22324455Smckusick 		    goto found_keyword;	/* I wish that C had a multi-level
22424455Smckusick 					 * break... */
22524455Smckusick 	}
22624455Smckusick 	if (p->rwd) {		/* we have a keyword */
22724455Smckusick     found_keyword:
22824455Smckusick 	    ps.its_a_keyword = true;
22924455Smckusick 	    ps.last_u_d = true;
23024455Smckusick 	    switch (p->rwcode) {
23124455Smckusick 		case 1:	/* it is a switch */
23224455Smckusick 		    return (swstmt);
23324455Smckusick 		case 2:	/* a case or default */
23424455Smckusick 		    return (casestmt);
2358804Smckusick 
23624455Smckusick 		case 3:	/* a "struct" */
23724455Smckusick 		    if (ps.p_l_follow)
23824455Smckusick 			break;	/* inside parens: cast */
23924455Smckusick 		    l_struct = true;
2408804Smckusick 
24124455Smckusick 		    /*
24224455Smckusick 		     * Next time around, we will want to know that we have
24324455Smckusick 		     * had a 'struct'
24424455Smckusick 		     */
24524455Smckusick 		case 4:	/* one of the declaration keywords */
24624455Smckusick 		    if (ps.p_l_follow) {
24724455Smckusick 			ps.cast_mask |= 1 << ps.p_l_follow;
24824455Smckusick 			break;	/* inside parens: cast */
24924455Smckusick 		    }
25024455Smckusick 		    last_code = decl;
25124455Smckusick 		    return (decl);
2528804Smckusick 
25324455Smckusick 		case 5:	/* if, while, for */
25424455Smckusick 		    return (sp_paren);
2558804Smckusick 
25624455Smckusick 		case 6:	/* do, else */
25724455Smckusick 		    return (sp_nparen);
2588804Smckusick 
25924455Smckusick 		case 7:
26024455Smckusick 		    ps.sizeof_keyword = true;
26124455Smckusick 		default:	/* all others are treated like any other
26224455Smckusick 				 * identifier */
26324455Smckusick 		    return (ident);
26424455Smckusick 	    }			/* end of switch */
26524455Smckusick 	}			/* end of if (found_it) */
26624455Smckusick 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
26724455Smckusick 	    && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
26824455Smckusick 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
26924455Smckusick 	    ps.in_parameter_declaration = 1;
27024455Smckusick 	}
2718804Smckusick 
27224455Smckusick 	/*
27324455Smckusick 	 * The following hack attempts to guess whether or not the current
27424455Smckusick 	 * token is in fact a declaration keyword -- one that has been
27524455Smckusick 	 * typedefd
27624455Smckusick 	 */
27724455Smckusick 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
27824455Smckusick 	    && !ps.p_l_follow
27924455Smckusick 	    && (ps.last_token == rparen || ps.last_token == semicolon ||
28024455Smckusick 		ps.last_token == decl ||
28124455Smckusick 		ps.last_token == lbrace || ps.last_token == rbrace)) {
28224455Smckusick 	    ps.its_a_keyword = true;
28324455Smckusick 	    ps.last_u_d = true;
28424455Smckusick 	    last_code = decl;
28524455Smckusick 	    return decl;
2868804Smckusick 	}
28724455Smckusick 	if (last_code == decl)	/* if this is a declared variable, then
28824455Smckusick 				 * following sign is unary */
28924455Smckusick 	    ps.last_u_d = true;	/* will make "int a -1" work */
2908804Smckusick 	last_code = ident;
29124455Smckusick 	return (ident);		/* the ident is not in the list */
29224455Smckusick     }				/* end of procesing for alpanum character */
29324649Smckusick     /* Scan a non-alphanumeric token */
2948804Smckusick 
29524455Smckusick     *tok++ = *buf_ptr;		/* if it is only a one-character token, it
29624455Smckusick 				 * is moved here */
2978804Smckusick     *tok = '\0';
2988804Smckusick     if (++buf_ptr >= buf_end)
29924455Smckusick 	fill_buffer();
3008804Smckusick 
3018804Smckusick     switch (*token) {
30224455Smckusick 	case '\n':
30324455Smckusick 	    unary_delim = ps.last_u_d;
30424455Smckusick 	    ps.last_nl = true;	/* remember that we just had a newline */
3058804Smckusick 	    code = (had_eof ? 0 : newline);
30624455Smckusick 
30724455Smckusick 	    /*
30824455Smckusick 	     * if data has been exausted, the newline is a dummy, and we
30924455Smckusick 	     * should return code to stop
31024455Smckusick 	     */
3118804Smckusick 	    break;
3128804Smckusick 
31324455Smckusick 	case '\'':		/* start of quoted character */
31424455Smckusick 	case '"':		/* start of string */
31524455Smckusick 	    qchar = *token;
31624455Smckusick 	    if (troff) {
31724455Smckusick 		tok[-1] = '`';
31824455Smckusick 		if (qchar == '"')
31924455Smckusick 		    *tok++ = '`';
32024455Smckusick 		*tok++ = BACKSLASH;
32124455Smckusick 		*tok++ = 'f';
32224455Smckusick 		*tok++ = 'L';
32324455Smckusick 	    }
32424455Smckusick 	    do {		/* copy the string */
32524455Smckusick 		while (1) {	/* move one character or [/<char>]<char> */
3268804Smckusick 		    if (*buf_ptr == '\n') {
32724455Smckusick 			printf("%d: Unterminated literal\n", line_no);
3288804Smckusick 			goto stop_lit;
3298804Smckusick 		    }
3308804Smckusick 		    *tok = *buf_ptr++;
3318804Smckusick 		    if (buf_ptr >= buf_end)
33224455Smckusick 			fill_buffer();
3338804Smckusick 		    if (had_eof || ((tok - token) > (bufsize - 2))) {
33424455Smckusick 			printf("Unterminated literal\n");
3358804Smckusick 			++tok;
3368804Smckusick 			goto stop_lit;
33724455Smckusick 			/* get outof literal copying loop */
3388804Smckusick 		    }
33924455Smckusick 		    if (*tok == BACKSLASH) {	/* if escape, copy extra
34024455Smckusick 						 * char */
34124455Smckusick 			if (*buf_ptr == '\n')	/* check for escaped
34224455Smckusick 						 * newline */
3438804Smckusick 			    ++line_no;
34424455Smckusick 			if (troff) {
34524455Smckusick 			    *++tok = BACKSLASH;
34624455Smckusick 			    if (*buf_ptr == BACKSLASH)
34724455Smckusick 				*++tok = BACKSLASH;
34824455Smckusick 			}
34924455Smckusick 			*++tok = *buf_ptr++;
35024455Smckusick 			++tok;	/* we must increment this again because we
35124455Smckusick 				 * copied two chars */
3528804Smckusick 			if (buf_ptr >= buf_end)
35324455Smckusick 			    fill_buffer();
3548804Smckusick 		    }
3558804Smckusick 		    else
35624455Smckusick 			break;	/* we copied one character */
35724455Smckusick 		}		/* end of while (1) */
3588804Smckusick 	    } while (*tok++ != qchar);
35924455Smckusick 	    if (troff) {
36024455Smckusick 		tok[-1] = BACKSLASH;
36124455Smckusick 		*tok++ = 'f';
36224455Smckusick 		*tok++ = 'R';
36324455Smckusick 		*tok++ = '\'';
36424455Smckusick 		if (qchar == '"')
36524455Smckusick 		    *tok++ = '\'';
36624455Smckusick 	    }
36724455Smckusick     stop_lit:
3688804Smckusick 	    code = ident;
3698804Smckusick 	    break;
3708804Smckusick 
37124455Smckusick 	case ('('):
37224455Smckusick 	case ('['):
3738804Smckusick 	    unary_delim = true;
3748804Smckusick 	    code = lparen;
3758804Smckusick 	    break;
3768804Smckusick 
37724455Smckusick 	case (')'):
37824455Smckusick 	case (']'):
3798804Smckusick 	    code = rparen;
3808804Smckusick 	    break;
3818804Smckusick 
38224455Smckusick 	case '#':
38324455Smckusick 	    unary_delim = ps.last_u_d;
3848804Smckusick 	    code = preesc;
3858804Smckusick 	    break;
3868804Smckusick 
38724455Smckusick 	case '?':
3888804Smckusick 	    unary_delim = true;
3898804Smckusick 	    code = question;
3908804Smckusick 	    break;
3918804Smckusick 
39224455Smckusick 	case (':'):
3938804Smckusick 	    code = colon;
3948804Smckusick 	    unary_delim = true;
3958804Smckusick 	    break;
3968804Smckusick 
39724455Smckusick 	case (';'):
3988804Smckusick 	    unary_delim = true;
3998804Smckusick 	    code = semicolon;
4008804Smckusick 	    break;
4018804Smckusick 
40224455Smckusick 	case ('{'):
4038804Smckusick 	    unary_delim = true;
40424455Smckusick 
40524455Smckusick 	    /*
40624455Smckusick 	     * if (ps.in_or_st) ps.block_init = 1;
40724455Smckusick 	     */
40824455Smckusick 	    code = ps.block_init ? lparen : lbrace;
4098804Smckusick 	    break;
4108804Smckusick 
41124455Smckusick 	case ('}'):
4128804Smckusick 	    unary_delim = true;
41324455Smckusick 	    code = ps.block_init ? rparen : rbrace;
4148804Smckusick 	    break;
4158804Smckusick 
41624455Smckusick 	case 014:		/* a form feed */
41724455Smckusick 	    unary_delim = ps.last_u_d;
41824455Smckusick 	    ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
41924455Smckusick 				 * right */
4208804Smckusick 	    code = form_feed;
4218804Smckusick 	    break;
4228804Smckusick 
42324455Smckusick 	case (','):
4248804Smckusick 	    unary_delim = true;
4258804Smckusick 	    code = comma;
4268804Smckusick 	    break;
4278804Smckusick 
42824455Smckusick 	case '.':
4298804Smckusick 	    unary_delim = false;
4308804Smckusick 	    code = period;
4318804Smckusick 	    break;
4328804Smckusick 
43324455Smckusick 	case '-':
43424455Smckusick 	case '+':		/* check for -, +, --, ++ */
43524455Smckusick 	    code = (ps.last_u_d ? unary_op : binary_op);
4368804Smckusick 	    unary_delim = true;
4378804Smckusick 
4388804Smckusick 	    if (*buf_ptr == token[0]) {
43924455Smckusick 		/* check for doubled character */
4408804Smckusick 		*tok++ = *buf_ptr++;
44124455Smckusick 		/* buffer overflow will be checked at end of loop */
4428804Smckusick 		if (last_code == ident || last_code == rparen) {
44324455Smckusick 		    code = (ps.last_u_d ? unary_op : postop);
44424455Smckusick 		    /* check for following ++ or -- */
4458804Smckusick 		    unary_delim = false;
4468804Smckusick 		}
4478804Smckusick 	    }
44824455Smckusick 	    else if (*buf_ptr == '=')
44924455Smckusick 		/* check for operator += */
45024455Smckusick 		*tok++ = *buf_ptr++;
45124677Smckusick 	    else if (token[0] == '-' && *buf_ptr == '>') {
45224455Smckusick 		/* check for operator -> */
45324455Smckusick 		*tok++ = *buf_ptr++;
45424677Smckusick 		if (!pointer_as_binop) {
45524677Smckusick 		    code = unary_op;
45624677Smckusick 		    unary_delim = false;
45724677Smckusick 		    ps.want_blank = false;
45824677Smckusick 		}
45924455Smckusick 	    }
46024455Smckusick 	    /* buffer overflow will be checked at end of switch */
4618804Smckusick 
4628804Smckusick 	    break;
4638804Smckusick 
46424455Smckusick 	case '=':
46524455Smckusick 	    if (ps.in_or_st)
46624455Smckusick 		ps.block_init = 1;
46724455Smckusick 	    if (chartype[*buf_ptr] == opchar) {	/* we have two char
46824455Smckusick 						 * assignment */
46924455Smckusick 		tok[-1] = *buf_ptr++;
47024455Smckusick 		if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
47124455Smckusick 		    *tok++ = *buf_ptr++;
47224455Smckusick 		*tok++ = '=';	/* Flip =+ to += */
47324455Smckusick 		*tok = 0;
4748804Smckusick 	    }
4758804Smckusick 	    code = binary_op;
4768804Smckusick 	    unary_delim = true;
47724455Smckusick 	    break;
47824455Smckusick 	    /* can drop thru!!! */
4798804Smckusick 
48024455Smckusick 	case '>':
48124455Smckusick 	case '<':
48224455Smckusick 	case '!':		/* ops like <, <<, <=, !=, etc */
4838804Smckusick 	    if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
4848804Smckusick 		*tok++ = *buf_ptr;
4858804Smckusick 		if (++buf_ptr >= buf_end)
48624455Smckusick 		    fill_buffer();
4878804Smckusick 	    }
4888804Smckusick 	    if (*buf_ptr == '=')
48924455Smckusick 		*tok++ = *buf_ptr++;
49024455Smckusick 	    code = (ps.last_u_d ? unary_op : binary_op);
4918804Smckusick 	    unary_delim = true;
4928804Smckusick 	    break;
4938804Smckusick 
49424455Smckusick 	default:
4958804Smckusick 	    if (token[0] == '/' && *buf_ptr == '*') {
49624455Smckusick 		/* it is start of comment */
4978804Smckusick 		*tok++ = '*';
4988804Smckusick 
4998804Smckusick 		if (++buf_ptr >= buf_end)
50024455Smckusick 		    fill_buffer();
5018804Smckusick 
5028804Smckusick 		code = comment;
50324455Smckusick 		unary_delim = ps.last_u_d;
5048804Smckusick 		break;
5058804Smckusick 	    }
50624455Smckusick 	    while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
50724455Smckusick 		/* handle ||, &&, etc, and also things as in int *****i */
5088804Smckusick 		*tok++ = *buf_ptr;
5098804Smckusick 		if (++buf_ptr >= buf_end)
51024455Smckusick 		    fill_buffer();
5118804Smckusick 	    }
51224455Smckusick 	    code = (ps.last_u_d ? unary_op : binary_op);
5138804Smckusick 	    unary_delim = true;
5148804Smckusick 
5158804Smckusick 
51624455Smckusick     }				/* end of switch */
5178804Smckusick     if (code != newline) {
5188804Smckusick 	l_struct = false;
5198804Smckusick 	last_code = code;
5208804Smckusick     }
52124455Smckusick     if (buf_ptr >= buf_end)	/* check for input buffer empty */
52224455Smckusick 	fill_buffer();
52324455Smckusick     ps.last_u_d = unary_delim;
52424455Smckusick     *tok = '\0';		/* null terminate the token */
5258804Smckusick     return (code);
5268804Smckusick };
52724455Smckusick 
52824455Smckusick /* Add the given keyword to the keyword table, using val as the keyword type
52924455Smckusick    */
53024455Smckusick addkey (key, val)
53124455Smckusick char       *key;
53224455Smckusick {
53324455Smckusick     register struct templ *p = specials;
53424455Smckusick     while (p->rwd)
53524455Smckusick 	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
53624455Smckusick 	    return;
53724455Smckusick 	else
53824455Smckusick 	    p++;
53924455Smckusick     if (p >= specials + sizeof specials / sizeof specials[0])
54024455Smckusick 	return;			/* For now, table overflows are silently
54124455Smckusick 				   ignored */
54224455Smckusick     p->rwd = key;
54324455Smckusick     p->rwcode = val;
54424455Smckusick     p[1].rwd = 0;
54524455Smckusick     p[1].rwcode = 0;
54624455Smckusick     return;
54724455Smckusick }
548