121970Sdist /*
235500Sbostic * Copyright (c) 1985 Sun Microsystems, Inc.
3*62040Sbostic * Copyright (c) 1980, 1993
4*62040Sbostic * The Regents of the University of California. All rights reserved.
533767Sbostic * All rights reserved.
633767Sbostic *
742688Sbostic * %sccs.include.redist.c%
821970Sdist */
98804Smckusick
1021970Sdist #ifndef lint
11*62040Sbostic static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 06/06/93";
1233767Sbostic #endif /* not lint */
1321970Sdist
1433767Sbostic /*
1535500Sbostic * Here we have the token scanner for indent. It scans off one token and puts
1635500Sbostic * it in the global variable "token". It returns a code, indicating the type
1735500Sbostic * of token scanned.
1824455Smckusick */
198804Smckusick
2046695Sbostic #include <stdio.h>
2146695Sbostic #include <ctype.h>
2246695Sbostic #include <stdlib.h>
2346695Sbostic #include <string.h>
2435504Sbostic #include "indent_globs.h"
2535504Sbostic #include "indent_codes.h"
268804Smckusick
278804Smckusick #define alphanum 1
288804Smckusick #define opchar 3
298804Smckusick
308804Smckusick struct templ {
3124455Smckusick char *rwd;
3224455Smckusick int rwcode;
338804Smckusick };
348804Smckusick
3524455Smckusick struct templ specials[100] =
368804Smckusick {
378804Smckusick "switch", 1,
388804Smckusick "case", 2,
3924455Smckusick "break", 0,
408804Smckusick "struct", 3,
4124455Smckusick "union", 3,
4224455Smckusick "enum", 3,
438804Smckusick "default", 2,
448804Smckusick "int", 4,
458804Smckusick "char", 4,
468804Smckusick "float", 4,
478804Smckusick "double", 4,
488804Smckusick "long", 4,
498804Smckusick "short", 4,
508804Smckusick "typdef", 4,
518804Smckusick "unsigned", 4,
528804Smckusick "register", 4,
538804Smckusick "static", 4,
548804Smckusick "global", 4,
558804Smckusick "extern", 4,
5624455Smckusick "void", 4,
5724455Smckusick "goto", 0,
5824455Smckusick "return", 0,
598804Smckusick "if", 5,
608804Smckusick "while", 5,
618804Smckusick "for", 5,
628804Smckusick "else", 6,
638804Smckusick "do", 6,
6424455Smckusick "sizeof", 7,
658804Smckusick 0, 0
668804Smckusick };
678804Smckusick
6824455Smckusick char chartype[128] =
6935500Sbostic { /* this is used to facilitate the decision of
7035500Sbostic * what type (alphanumeric, operator) each
7135500Sbostic * character is */
728804Smckusick 0, 0, 0, 0, 0, 0, 0, 0,
738804Smckusick 0, 0, 0, 0, 0, 0, 0, 0,
748804Smckusick 0, 0, 0, 0, 0, 0, 0, 0,
758804Smckusick 0, 0, 0, 0, 0, 0, 0, 0,
7633768Sbostic 0, 3, 0, 0, 1, 3, 3, 0,
7735500Sbostic 0, 0, 3, 3, 0, 3, 0, 3,
788804Smckusick 1, 1, 1, 1, 1, 1, 1, 1,
798804Smckusick 1, 1, 0, 0, 3, 3, 3, 3,
808804Smckusick 0, 1, 1, 1, 1, 1, 1, 1,
818804Smckusick 1, 1, 1, 1, 1, 1, 1, 1,
828804Smckusick 1, 1, 1, 1, 1, 1, 1, 1,
838804Smckusick 1, 1, 1, 0, 0, 0, 3, 1,
848804Smckusick 0, 1, 1, 1, 1, 1, 1, 1,
858804Smckusick 1, 1, 1, 1, 1, 1, 1, 1,
868804Smckusick 1, 1, 1, 1, 1, 1, 1, 1,
878804Smckusick 1, 1, 1, 0, 3, 0, 3, 0
888804Smckusick };
898804Smckusick
908804Smckusick
918804Smckusick
928804Smckusick
9335500Sbostic int
lexi()9424455Smckusick lexi()
9524455Smckusick {
9635500Sbostic int unary_delim; /* this is set to 1 if the current token
9735500Sbostic *
9824455Smckusick * forces a following operator to be unary */
9924455Smckusick static int last_code; /* the last token type returned */
10024455Smckusick static int l_struct; /* set to 1 if the last token was 'struct' */
10124455Smckusick int code; /* internal code to be returned */
10224455Smckusick char qchar; /* the delimiter character for a string */
1038804Smckusick
10438011Sbostic e_token = s_token; /* point to start of place to save token */
1058804Smckusick unary_delim = false;
10624455Smckusick ps.col_1 = ps.last_nl; /* tell world that this token started in
10735500Sbostic * column 1 iff the last thing scanned was nl */
10824455Smckusick ps.last_nl = false;
1098804Smckusick
11024455Smckusick while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
11135500Sbostic ps.col_1 = false; /* leading blanks imply token is not in column
11235500Sbostic * 1 */
1138804Smckusick if (++buf_ptr >= buf_end)
11424455Smckusick fill_buffer();
1158804Smckusick }
1168804Smckusick
11735500Sbostic /* Scan an alphanumeric token */
11835500Sbostic if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
11935500Sbostic /*
12035500Sbostic * we have a character or number
12135500Sbostic */
12235500Sbostic register char *j; /* used for searching thru list of
12335500Sbostic *
12424455Smckusick * reserved words */
12524455Smckusick register struct templ *p;
1268804Smckusick
12735500Sbostic if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
12835500Sbostic int seendot = 0,
12935500Sbostic seenexp = 0;
13035500Sbostic if (*buf_ptr == '0' &&
13135500Sbostic (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
13238011Sbostic *e_token++ = *buf_ptr++;
13338011Sbostic *e_token++ = *buf_ptr++;
13438011Sbostic while (isxdigit(*buf_ptr)) {
13540275Sbostic CHECK_SIZE_TOKEN;
13638011Sbostic *e_token++ = *buf_ptr++;
13738011Sbostic }
13835500Sbostic }
13935500Sbostic else
14035500Sbostic while (1) {
14135500Sbostic if (*buf_ptr == '.')
14235500Sbostic if (seendot)
14335500Sbostic break;
14435500Sbostic else
14535500Sbostic seendot++;
14640275Sbostic CHECK_SIZE_TOKEN;
14738011Sbostic *e_token++ = *buf_ptr++;
14835500Sbostic if (!isdigit(*buf_ptr) && *buf_ptr != '.')
14935500Sbostic if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
15035500Sbostic break;
15135500Sbostic else {
15235500Sbostic seenexp++;
15335500Sbostic seendot++;
15440275Sbostic CHECK_SIZE_TOKEN;
15538011Sbostic *e_token++ = *buf_ptr++;
15635500Sbostic if (*buf_ptr == '+' || *buf_ptr == '-')
15738011Sbostic *e_token++ = *buf_ptr++;
15835500Sbostic }
15935500Sbostic }
16035500Sbostic if (*buf_ptr == 'L' || *buf_ptr == 'l')
16138011Sbostic *e_token++ = *buf_ptr++;
16235500Sbostic }
16335500Sbostic else
16435500Sbostic while (chartype[*buf_ptr] == alphanum) { /* copy it over */
16540275Sbostic CHECK_SIZE_TOKEN;
16638011Sbostic *e_token++ = *buf_ptr++;
16735500Sbostic if (buf_ptr >= buf_end)
16835500Sbostic fill_buffer();
16935500Sbostic }
17038011Sbostic *e_token++ = '\0';
17124455Smckusick while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
17224455Smckusick if (++buf_ptr >= buf_end)
17324455Smckusick fill_buffer();
17424455Smckusick }
17524455Smckusick ps.its_a_keyword = false;
17624455Smckusick ps.sizeof_keyword = false;
17735500Sbostic if (l_struct) { /* if last token was 'struct', then this token
17835500Sbostic * should be treated as a declaration */
1798804Smckusick l_struct = false;
1808804Smckusick last_code = ident;
18124455Smckusick ps.last_u_d = true;
1828804Smckusick return (decl);
1838804Smckusick }
18424455Smckusick ps.last_u_d = false; /* Operator after indentifier is binary */
18524455Smckusick last_code = ident; /* Remember that this is the code we will
18624455Smckusick * return */
1878804Smckusick
18824455Smckusick /*
18935500Sbostic * This loop will check if the token is a keyword.
19024455Smckusick */
19124455Smckusick for (p = specials; (j = p->rwd) != 0; p++) {
19238011Sbostic register char *p = s_token; /* point at scanned token */
19338011Sbostic if (*j++ != *p++ || *j++ != *p++)
19424455Smckusick continue; /* This test depends on the fact that
19535500Sbostic * identifiers are always at least 1 character
19635500Sbostic * long (ie. the first two bytes of the
19735500Sbostic * identifier are always meaningful) */
19838011Sbostic if (p[-1] == 0)
19924455Smckusick break; /* If its a one-character identifier */
20038011Sbostic while (*p++ == *j)
20124455Smckusick if (*j++ == 0)
20224455Smckusick goto found_keyword; /* I wish that C had a multi-level
20324455Smckusick * break... */
20424455Smckusick }
20524455Smckusick if (p->rwd) { /* we have a keyword */
20624455Smckusick found_keyword:
20724455Smckusick ps.its_a_keyword = true;
20824455Smckusick ps.last_u_d = true;
20924455Smckusick switch (p->rwcode) {
21035500Sbostic case 1: /* it is a switch */
21135500Sbostic return (swstmt);
21235500Sbostic case 2: /* a case or default */
21335500Sbostic return (casestmt);
2148804Smckusick
21535500Sbostic case 3: /* a "struct" */
21635500Sbostic if (ps.p_l_follow)
21735500Sbostic break; /* inside parens: cast */
21835500Sbostic l_struct = true;
2198804Smckusick
22035500Sbostic /*
22135500Sbostic * Next time around, we will want to know that we have had a
22235500Sbostic * 'struct'
22335500Sbostic */
22435500Sbostic case 4: /* one of the declaration keywords */
22535500Sbostic if (ps.p_l_follow) {
22635500Sbostic ps.cast_mask |= 1 << ps.p_l_follow;
22735500Sbostic break; /* inside parens: cast */
22835500Sbostic }
22935500Sbostic last_code = decl;
23035500Sbostic return (decl);
2318804Smckusick
23235500Sbostic case 5: /* if, while, for */
23335500Sbostic return (sp_paren);
2348804Smckusick
23535500Sbostic case 6: /* do, else */
23635500Sbostic return (sp_nparen);
2378804Smckusick
23835500Sbostic case 7:
23935500Sbostic ps.sizeof_keyword = true;
24035500Sbostic default: /* all others are treated like any other
24124455Smckusick * identifier */
24235500Sbostic return (ident);
24324455Smckusick } /* end of switch */
24424455Smckusick } /* end of if (found_it) */
24535500Sbostic if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
24635507Sbostic register char *tp = buf_ptr;
24735507Sbostic while (tp < buf_end)
24838011Sbostic if (*tp++ == ')' && (*tp == ';' || *tp == ','))
24935500Sbostic goto not_proc;
25024455Smckusick strncpy(ps.procname, token, sizeof ps.procname - 1);
25124455Smckusick ps.in_parameter_declaration = 1;
25238011Sbostic rparen_count = 1;
25335500Sbostic not_proc:;
25424455Smckusick }
25524455Smckusick /*
25624455Smckusick * The following hack attempts to guess whether or not the current
25724455Smckusick * token is in fact a declaration keyword -- one that has been
25835500Sbostic * typedefd
25924455Smckusick */
26035500Sbostic if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
26135500Sbostic && !ps.p_l_follow
26235500Sbostic && !ps.block_init
26335500Sbostic && (ps.last_token == rparen || ps.last_token == semicolon ||
26435500Sbostic ps.last_token == decl ||
26535500Sbostic ps.last_token == lbrace || ps.last_token == rbrace)) {
26624455Smckusick ps.its_a_keyword = true;
26724455Smckusick ps.last_u_d = true;
26824455Smckusick last_code = decl;
26924455Smckusick return decl;
2708804Smckusick }
27124455Smckusick if (last_code == decl) /* if this is a declared variable, then
27224455Smckusick * following sign is unary */
27324455Smckusick ps.last_u_d = true; /* will make "int a -1" work */
2748804Smckusick last_code = ident;
27524455Smckusick return (ident); /* the ident is not in the list */
27624455Smckusick } /* end of procesing for alpanum character */
2778804Smckusick
27838011Sbostic /* Scan a non-alphanumeric token */
27938011Sbostic
28038011Sbostic *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
28135500Sbostic * moved here */
28238011Sbostic *e_token = '\0';
2838804Smckusick if (++buf_ptr >= buf_end)
28424455Smckusick fill_buffer();
2858804Smckusick
2868804Smckusick switch (*token) {
28735500Sbostic case '\n':
28835500Sbostic unary_delim = ps.last_u_d;
28935500Sbostic ps.last_nl = true; /* remember that we just had a newline */
29035500Sbostic code = (had_eof ? 0 : newline);
29124455Smckusick
29235500Sbostic /*
29335500Sbostic * if data has been exausted, the newline is a dummy, and we should
29435500Sbostic * return code to stop
29535500Sbostic */
29635500Sbostic break;
2978804Smckusick
29835500Sbostic case '\'': /* start of quoted character */
29935500Sbostic case '"': /* start of string */
30035500Sbostic qchar = *token;
30135500Sbostic if (troff) {
30238011Sbostic e_token[-1] = '`';
30335500Sbostic if (qchar == '"')
30438011Sbostic *e_token++ = '`';
30538011Sbostic e_token = chfont(&bodyf, &stringf, e_token);
30635500Sbostic }
30735500Sbostic do { /* copy the string */
30835500Sbostic while (1) { /* move one character or [/<char>]<char> */
30935500Sbostic if (*buf_ptr == '\n') {
31035500Sbostic printf("%d: Unterminated literal\n", line_no);
31135500Sbostic goto stop_lit;
31235500Sbostic }
31340275Sbostic CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
31440275Sbostic * since CHECK_SIZE guarantees that there
31538011Sbostic * are at least 5 entries left */
31638011Sbostic *e_token = *buf_ptr++;
31735500Sbostic if (buf_ptr >= buf_end)
31835500Sbostic fill_buffer();
31938011Sbostic if (*e_token == BACKSLASH) { /* if escape, copy extra char */
32035500Sbostic if (*buf_ptr == '\n') /* check for escaped newline */
32135500Sbostic ++line_no;
32235500Sbostic if (troff) {
32338011Sbostic *++e_token = BACKSLASH;
32435500Sbostic if (*buf_ptr == BACKSLASH)
32538011Sbostic *++e_token = BACKSLASH;
3268804Smckusick }
32738011Sbostic *++e_token = *buf_ptr++;
32838011Sbostic ++e_token; /* we must increment this again because we
32935500Sbostic * copied two chars */
3308804Smckusick if (buf_ptr >= buf_end)
33124455Smckusick fill_buffer();
33235500Sbostic }
33335500Sbostic else
33435500Sbostic break; /* we copied one character */
33535500Sbostic } /* end of while (1) */
33638011Sbostic } while (*e_token++ != qchar);
33735500Sbostic if (troff) {
33838011Sbostic e_token = chfont(&stringf, &bodyf, e_token - 1);
33935500Sbostic if (qchar == '"')
34038011Sbostic *e_token++ = '\'';
34135500Sbostic }
34235500Sbostic stop_lit:
34335500Sbostic code = ident;
34435500Sbostic break;
3458804Smckusick
34635500Sbostic case ('('):
34735500Sbostic case ('['):
34835500Sbostic unary_delim = true;
34935500Sbostic code = lparen;
35035500Sbostic break;
3518804Smckusick
35235500Sbostic case (')'):
35335500Sbostic case (']'):
35435500Sbostic code = rparen;
35535500Sbostic break;
3568804Smckusick
35735500Sbostic case '#':
35835500Sbostic unary_delim = ps.last_u_d;
35935500Sbostic code = preesc;
36035500Sbostic break;
3618804Smckusick
36235500Sbostic case '?':
36335500Sbostic unary_delim = true;
36435500Sbostic code = question;
36535500Sbostic break;
3668804Smckusick
36735500Sbostic case (':'):
36835500Sbostic code = colon;
36935500Sbostic unary_delim = true;
37035500Sbostic break;
3718804Smckusick
37235500Sbostic case (';'):
37335500Sbostic unary_delim = true;
37435500Sbostic code = semicolon;
37535500Sbostic break;
3768804Smckusick
37735500Sbostic case ('{'):
37835500Sbostic unary_delim = true;
37924455Smckusick
38035500Sbostic /*
38135500Sbostic * if (ps.in_or_st) ps.block_init = 1;
38235500Sbostic */
38335500Sbostic /* ? code = ps.block_init ? lparen : lbrace; */
38435500Sbostic code = lbrace;
38535500Sbostic break;
3868804Smckusick
38735500Sbostic case ('}'):
38835500Sbostic unary_delim = true;
38935500Sbostic /* ? code = ps.block_init ? rparen : rbrace; */
39035500Sbostic code = rbrace;
39135500Sbostic break;
3928804Smckusick
39335500Sbostic case 014: /* a form feed */
39435500Sbostic unary_delim = ps.last_u_d;
39535500Sbostic ps.last_nl = true; /* remember this so we can set 'ps.col_1'
39624455Smckusick * right */
39735500Sbostic code = form_feed;
39835500Sbostic break;
3998804Smckusick
40035500Sbostic case (','):
40135500Sbostic unary_delim = true;
40235500Sbostic code = comma;
40335500Sbostic break;
4048804Smckusick
40535500Sbostic case '.':
40635500Sbostic unary_delim = false;
40735500Sbostic code = period;
40835500Sbostic break;
4098804Smckusick
41035500Sbostic case '-':
41135500Sbostic case '+': /* check for -, +, --, ++ */
41235500Sbostic code = (ps.last_u_d ? unary_op : binary_op);
41335500Sbostic unary_delim = true;
4148804Smckusick
41535500Sbostic if (*buf_ptr == token[0]) {
41635500Sbostic /* check for doubled character */
41738011Sbostic *e_token++ = *buf_ptr++;
41835500Sbostic /* buffer overflow will be checked at end of loop */
41935500Sbostic if (last_code == ident || last_code == rparen) {
42035500Sbostic code = (ps.last_u_d ? unary_op : postop);
42135500Sbostic /* check for following ++ or -- */
42235500Sbostic unary_delim = false;
4238804Smckusick }
42435500Sbostic }
42535500Sbostic else if (*buf_ptr == '=')
42635500Sbostic /* check for operator += */
42738011Sbostic *e_token++ = *buf_ptr++;
42835500Sbostic else if (*buf_ptr == '>') {
42935500Sbostic /* check for operator -> */
43038011Sbostic *e_token++ = *buf_ptr++;
43135500Sbostic if (!pointer_as_binop) {
43235500Sbostic unary_delim = false;
43335500Sbostic code = unary_op;
43435500Sbostic ps.want_blank = false;
43524455Smckusick }
43635500Sbostic }
43735500Sbostic break; /* buffer overflow will be checked at end of
43835500Sbostic * switch */
4398804Smckusick
44035500Sbostic case '=':
44135500Sbostic if (ps.in_or_st)
44235500Sbostic ps.block_init = 1;
44335500Sbostic #ifdef undef
44435500Sbostic if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
44538011Sbostic e_token[-1] = *buf_ptr++;
44638011Sbostic if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
44738011Sbostic *e_token++ = *buf_ptr++;
44838011Sbostic *e_token++ = '='; /* Flip =+ to += */
44938011Sbostic *e_token = 0;
45035500Sbostic }
45135500Sbostic #else
45235500Sbostic if (*buf_ptr == '=') {/* == */
45338011Sbostic *e_token++ = '='; /* Flip =+ to += */
45435500Sbostic buf_ptr++;
45538011Sbostic *e_token = 0;
45635500Sbostic }
45735500Sbostic #endif
45835500Sbostic code = binary_op;
45935500Sbostic unary_delim = true;
46035500Sbostic break;
46135500Sbostic /* can drop thru!!! */
4628804Smckusick
46335500Sbostic case '>':
46435500Sbostic case '<':
46535500Sbostic case '!': /* ops like <, <<, <=, !=, etc */
46635500Sbostic if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
46738011Sbostic *e_token++ = *buf_ptr;
46835500Sbostic if (++buf_ptr >= buf_end)
46935500Sbostic fill_buffer();
47035500Sbostic }
47135500Sbostic if (*buf_ptr == '=')
47238011Sbostic *e_token++ = *buf_ptr++;
47335500Sbostic code = (ps.last_u_d ? unary_op : binary_op);
47435500Sbostic unary_delim = true;
47535500Sbostic break;
4768804Smckusick
47735500Sbostic default:
47835500Sbostic if (token[0] == '/' && *buf_ptr == '*') {
47935500Sbostic /* it is start of comment */
48038011Sbostic *e_token++ = '*';
4818804Smckusick
48235500Sbostic if (++buf_ptr >= buf_end)
48335500Sbostic fill_buffer();
4848804Smckusick
48535500Sbostic code = comment;
48635500Sbostic unary_delim = ps.last_u_d;
48735500Sbostic break;
48835500Sbostic }
48938011Sbostic while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
49035500Sbostic /*
49135500Sbostic * handle ||, &&, etc, and also things as in int *****i
49235500Sbostic */
49338011Sbostic *e_token++ = *buf_ptr;
49435500Sbostic if (++buf_ptr >= buf_end)
49535500Sbostic fill_buffer();
49635500Sbostic }
49735500Sbostic code = (ps.last_u_d ? unary_op : binary_op);
49835500Sbostic unary_delim = true;
4998804Smckusick
5008804Smckusick
50124455Smckusick } /* end of switch */
5028804Smckusick if (code != newline) {
5038804Smckusick l_struct = false;
5048804Smckusick last_code = code;
5058804Smckusick }
50624455Smckusick if (buf_ptr >= buf_end) /* check for input buffer empty */
50724455Smckusick fill_buffer();
50824455Smckusick ps.last_u_d = unary_delim;
50938011Sbostic *e_token = '\0'; /* null terminate the token */
5108804Smckusick return (code);
51136971Sbostic }
51224455Smckusick
51335500Sbostic /*
51435500Sbostic * Add the given keyword to the keyword table, using val as the keyword type
51535500Sbostic */
addkey(key,val)51635500Sbostic addkey(key, val)
51735500Sbostic char *key;
51824455Smckusick {
51924455Smckusick register struct templ *p = specials;
52024455Smckusick while (p->rwd)
52124455Smckusick if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
52224455Smckusick return;
52324455Smckusick else
52424455Smckusick p++;
52524455Smckusick if (p >= specials + sizeof specials / sizeof specials[0])
52624455Smckusick return; /* For now, table overflows are silently
52735500Sbostic * ignored */
52824455Smckusick p->rwd = key;
52924455Smckusick p->rwcode = val;
53024455Smckusick p[1].rwd = 0;
53124455Smckusick p[1].rwcode = 0;
53224455Smckusick return;
53324455Smckusick }
534