121970Sdist /* 235500Sbostic * Copyright (c) 1985 Sun Microsystems, Inc. 335500Sbostic * Copyright (c) 1980 The Regents of the University of California. 433767Sbostic * Copyright (c) 1976 Board of Trustees of the University of Illinois. 533767Sbostic * All rights reserved. 633767Sbostic * 7*42688Sbostic * %sccs.include.redist.c% 821970Sdist */ 98804Smckusick 1021970Sdist #ifndef lint 11*42688Sbostic static char sccsid[] = "@(#)lexi.c 5.15 (Berkeley) 06/01/90"; 1233767Sbostic #endif /* not lint */ 1321970Sdist 1433767Sbostic /* 1535500Sbostic * Here we have the token scanner for indent. It scans off one token and puts 1635500Sbostic * it in the global variable "token". It returns a code, indicating the type 1735500Sbostic * of token scanned. 1824455Smckusick */ 198804Smckusick 2035504Sbostic #include "indent_globs.h" 2135504Sbostic #include "indent_codes.h" 2236971Sbostic #include <ctype.h> 238804Smckusick 248804Smckusick #define alphanum 1 258804Smckusick #define opchar 3 268804Smckusick 278804Smckusick struct templ { 2824455Smckusick char *rwd; 2924455Smckusick int rwcode; 308804Smckusick }; 318804Smckusick 3224455Smckusick struct templ specials[100] = 338804Smckusick { 348804Smckusick "switch", 1, 358804Smckusick "case", 2, 3624455Smckusick "break", 0, 378804Smckusick "struct", 3, 3824455Smckusick "union", 3, 3924455Smckusick "enum", 3, 408804Smckusick "default", 2, 418804Smckusick "int", 4, 428804Smckusick "char", 4, 438804Smckusick "float", 4, 448804Smckusick "double", 4, 458804Smckusick "long", 4, 468804Smckusick "short", 4, 478804Smckusick "typdef", 4, 488804Smckusick "unsigned", 4, 498804Smckusick "register", 4, 508804Smckusick "static", 4, 518804Smckusick "global", 4, 528804Smckusick "extern", 4, 5324455Smckusick "void", 4, 5424455Smckusick "goto", 0, 5524455Smckusick "return", 0, 568804Smckusick "if", 5, 578804Smckusick "while", 5, 588804Smckusick "for", 5, 598804Smckusick "else", 6, 608804Smckusick "do", 6, 6124455Smckusick "sizeof", 7, 628804Smckusick 0, 0 638804Smckusick }; 648804Smckusick 6524455Smckusick char chartype[128] = 6635500Sbostic { /* this is used to facilitate the decision of 6735500Sbostic * what type (alphanumeric, operator) each 6835500Sbostic * character is */ 698804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 708804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 718804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 728804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 7333768Sbostic 0, 3, 0, 0, 1, 3, 3, 0, 7435500Sbostic 0, 0, 3, 3, 0, 3, 0, 3, 758804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 768804Smckusick 1, 1, 0, 0, 3, 3, 3, 3, 778804Smckusick 0, 1, 1, 1, 1, 1, 1, 1, 788804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 798804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 808804Smckusick 1, 1, 1, 0, 0, 0, 3, 1, 818804Smckusick 0, 1, 1, 1, 1, 1, 1, 1, 828804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 838804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 848804Smckusick 1, 1, 1, 0, 3, 0, 3, 0 858804Smckusick }; 868804Smckusick 878804Smckusick 888804Smckusick 898804Smckusick 9035500Sbostic int 9124455Smckusick lexi() 9224455Smckusick { 9335500Sbostic int unary_delim; /* this is set to 1 if the current token 9435500Sbostic * 9524455Smckusick * forces a following operator to be unary */ 9624455Smckusick static int last_code; /* the last token type returned */ 9724455Smckusick static int l_struct; /* set to 1 if the last token was 'struct' */ 9824455Smckusick int code; /* internal code to be returned */ 9924455Smckusick char qchar; /* the delimiter character for a string */ 1008804Smckusick 10138011Sbostic e_token = s_token; /* point to start of place to save token */ 1028804Smckusick unary_delim = false; 10324455Smckusick ps.col_1 = ps.last_nl; /* tell world that this token started in 10435500Sbostic * column 1 iff the last thing scanned was nl */ 10524455Smckusick ps.last_nl = false; 1068804Smckusick 10724455Smckusick while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 10835500Sbostic ps.col_1 = false; /* leading blanks imply token is not in column 10935500Sbostic * 1 */ 1108804Smckusick if (++buf_ptr >= buf_end) 11124455Smckusick fill_buffer(); 1128804Smckusick } 1138804Smckusick 11435500Sbostic /* Scan an alphanumeric token */ 11535500Sbostic if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 11635500Sbostic /* 11735500Sbostic * we have a character or number 11835500Sbostic */ 11935500Sbostic register char *j; /* used for searching thru list of 12035500Sbostic * 12124455Smckusick * reserved words */ 12224455Smckusick register struct templ *p; 1238804Smckusick 12435500Sbostic if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 12535500Sbostic int seendot = 0, 12635500Sbostic seenexp = 0; 12735500Sbostic if (*buf_ptr == '0' && 12835500Sbostic (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 12938011Sbostic *e_token++ = *buf_ptr++; 13038011Sbostic *e_token++ = *buf_ptr++; 13138011Sbostic while (isxdigit(*buf_ptr)) { 13240275Sbostic CHECK_SIZE_TOKEN; 13338011Sbostic *e_token++ = *buf_ptr++; 13438011Sbostic } 13535500Sbostic } 13635500Sbostic else 13735500Sbostic while (1) { 13835500Sbostic if (*buf_ptr == '.') 13935500Sbostic if (seendot) 14035500Sbostic break; 14135500Sbostic else 14235500Sbostic seendot++; 14340275Sbostic CHECK_SIZE_TOKEN; 14438011Sbostic *e_token++ = *buf_ptr++; 14535500Sbostic if (!isdigit(*buf_ptr) && *buf_ptr != '.') 14635500Sbostic if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 14735500Sbostic break; 14835500Sbostic else { 14935500Sbostic seenexp++; 15035500Sbostic seendot++; 15140275Sbostic CHECK_SIZE_TOKEN; 15238011Sbostic *e_token++ = *buf_ptr++; 15335500Sbostic if (*buf_ptr == '+' || *buf_ptr == '-') 15438011Sbostic *e_token++ = *buf_ptr++; 15535500Sbostic } 15635500Sbostic } 15735500Sbostic if (*buf_ptr == 'L' || *buf_ptr == 'l') 15838011Sbostic *e_token++ = *buf_ptr++; 15935500Sbostic } 16035500Sbostic else 16135500Sbostic while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 16240275Sbostic CHECK_SIZE_TOKEN; 16338011Sbostic *e_token++ = *buf_ptr++; 16435500Sbostic if (buf_ptr >= buf_end) 16535500Sbostic fill_buffer(); 16635500Sbostic } 16738011Sbostic *e_token++ = '\0'; 16824455Smckusick while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 16924455Smckusick if (++buf_ptr >= buf_end) 17024455Smckusick fill_buffer(); 17124455Smckusick } 17224455Smckusick ps.its_a_keyword = false; 17324455Smckusick ps.sizeof_keyword = false; 17435500Sbostic if (l_struct) { /* if last token was 'struct', then this token 17535500Sbostic * should be treated as a declaration */ 1768804Smckusick l_struct = false; 1778804Smckusick last_code = ident; 17824455Smckusick ps.last_u_d = true; 1798804Smckusick return (decl); 1808804Smckusick } 18124455Smckusick ps.last_u_d = false; /* Operator after indentifier is binary */ 18224455Smckusick last_code = ident; /* Remember that this is the code we will 18324455Smckusick * return */ 1848804Smckusick 18524455Smckusick /* 18635500Sbostic * This loop will check if the token is a keyword. 18724455Smckusick */ 18824455Smckusick for (p = specials; (j = p->rwd) != 0; p++) { 18938011Sbostic register char *p = s_token; /* point at scanned token */ 19038011Sbostic if (*j++ != *p++ || *j++ != *p++) 19124455Smckusick continue; /* This test depends on the fact that 19235500Sbostic * identifiers are always at least 1 character 19335500Sbostic * long (ie. the first two bytes of the 19435500Sbostic * identifier are always meaningful) */ 19538011Sbostic if (p[-1] == 0) 19624455Smckusick break; /* If its a one-character identifier */ 19738011Sbostic while (*p++ == *j) 19824455Smckusick if (*j++ == 0) 19924455Smckusick goto found_keyword; /* I wish that C had a multi-level 20024455Smckusick * break... */ 20124455Smckusick } 20224455Smckusick if (p->rwd) { /* we have a keyword */ 20324455Smckusick found_keyword: 20424455Smckusick ps.its_a_keyword = true; 20524455Smckusick ps.last_u_d = true; 20624455Smckusick switch (p->rwcode) { 20735500Sbostic case 1: /* it is a switch */ 20835500Sbostic return (swstmt); 20935500Sbostic case 2: /* a case or default */ 21035500Sbostic return (casestmt); 2118804Smckusick 21235500Sbostic case 3: /* a "struct" */ 21335500Sbostic if (ps.p_l_follow) 21435500Sbostic break; /* inside parens: cast */ 21535500Sbostic l_struct = true; 2168804Smckusick 21735500Sbostic /* 21835500Sbostic * Next time around, we will want to know that we have had a 21935500Sbostic * 'struct' 22035500Sbostic */ 22135500Sbostic case 4: /* one of the declaration keywords */ 22235500Sbostic if (ps.p_l_follow) { 22335500Sbostic ps.cast_mask |= 1 << ps.p_l_follow; 22435500Sbostic break; /* inside parens: cast */ 22535500Sbostic } 22635500Sbostic last_code = decl; 22735500Sbostic return (decl); 2288804Smckusick 22935500Sbostic case 5: /* if, while, for */ 23035500Sbostic return (sp_paren); 2318804Smckusick 23235500Sbostic case 6: /* do, else */ 23335500Sbostic return (sp_nparen); 2348804Smckusick 23535500Sbostic case 7: 23635500Sbostic ps.sizeof_keyword = true; 23735500Sbostic default: /* all others are treated like any other 23824455Smckusick * identifier */ 23935500Sbostic return (ident); 24024455Smckusick } /* end of switch */ 24124455Smckusick } /* end of if (found_it) */ 24235500Sbostic if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 24335507Sbostic register char *tp = buf_ptr; 24435507Sbostic while (tp < buf_end) 24538011Sbostic if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 24635500Sbostic goto not_proc; 24724455Smckusick strncpy(ps.procname, token, sizeof ps.procname - 1); 24824455Smckusick ps.in_parameter_declaration = 1; 24938011Sbostic rparen_count = 1; 25035500Sbostic not_proc:; 25124455Smckusick } 25224455Smckusick /* 25324455Smckusick * The following hack attempts to guess whether or not the current 25424455Smckusick * token is in fact a declaration keyword -- one that has been 25535500Sbostic * typedefd 25624455Smckusick */ 25735500Sbostic if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 25835500Sbostic && !ps.p_l_follow 25935500Sbostic && !ps.block_init 26035500Sbostic && (ps.last_token == rparen || ps.last_token == semicolon || 26135500Sbostic ps.last_token == decl || 26235500Sbostic ps.last_token == lbrace || ps.last_token == rbrace)) { 26324455Smckusick ps.its_a_keyword = true; 26424455Smckusick ps.last_u_d = true; 26524455Smckusick last_code = decl; 26624455Smckusick return decl; 2678804Smckusick } 26824455Smckusick if (last_code == decl) /* if this is a declared variable, then 26924455Smckusick * following sign is unary */ 27024455Smckusick ps.last_u_d = true; /* will make "int a -1" work */ 2718804Smckusick last_code = ident; 27224455Smckusick return (ident); /* the ident is not in the list */ 27324455Smckusick } /* end of procesing for alpanum character */ 2748804Smckusick 27538011Sbostic /* Scan a non-alphanumeric token */ 27638011Sbostic 27738011Sbostic *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 27835500Sbostic * moved here */ 27938011Sbostic *e_token = '\0'; 2808804Smckusick if (++buf_ptr >= buf_end) 28124455Smckusick fill_buffer(); 2828804Smckusick 2838804Smckusick switch (*token) { 28435500Sbostic case '\n': 28535500Sbostic unary_delim = ps.last_u_d; 28635500Sbostic ps.last_nl = true; /* remember that we just had a newline */ 28735500Sbostic code = (had_eof ? 0 : newline); 28824455Smckusick 28935500Sbostic /* 29035500Sbostic * if data has been exausted, the newline is a dummy, and we should 29135500Sbostic * return code to stop 29235500Sbostic */ 29335500Sbostic break; 2948804Smckusick 29535500Sbostic case '\'': /* start of quoted character */ 29635500Sbostic case '"': /* start of string */ 29735500Sbostic qchar = *token; 29835500Sbostic if (troff) { 29938011Sbostic e_token[-1] = '`'; 30035500Sbostic if (qchar == '"') 30138011Sbostic *e_token++ = '`'; 30238011Sbostic e_token = chfont(&bodyf, &stringf, e_token); 30335500Sbostic } 30435500Sbostic do { /* copy the string */ 30535500Sbostic while (1) { /* move one character or [/<char>]<char> */ 30635500Sbostic if (*buf_ptr == '\n') { 30735500Sbostic printf("%d: Unterminated literal\n", line_no); 30835500Sbostic goto stop_lit; 30935500Sbostic } 31040275Sbostic CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 31140275Sbostic * since CHECK_SIZE guarantees that there 31238011Sbostic * are at least 5 entries left */ 31338011Sbostic *e_token = *buf_ptr++; 31435500Sbostic if (buf_ptr >= buf_end) 31535500Sbostic fill_buffer(); 31638011Sbostic if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 31735500Sbostic if (*buf_ptr == '\n') /* check for escaped newline */ 31835500Sbostic ++line_no; 31935500Sbostic if (troff) { 32038011Sbostic *++e_token = BACKSLASH; 32135500Sbostic if (*buf_ptr == BACKSLASH) 32238011Sbostic *++e_token = BACKSLASH; 3238804Smckusick } 32438011Sbostic *++e_token = *buf_ptr++; 32538011Sbostic ++e_token; /* we must increment this again because we 32635500Sbostic * copied two chars */ 3278804Smckusick if (buf_ptr >= buf_end) 32824455Smckusick fill_buffer(); 32935500Sbostic } 33035500Sbostic else 33135500Sbostic break; /* we copied one character */ 33235500Sbostic } /* end of while (1) */ 33338011Sbostic } while (*e_token++ != qchar); 33435500Sbostic if (troff) { 33538011Sbostic e_token = chfont(&stringf, &bodyf, e_token - 1); 33635500Sbostic if (qchar == '"') 33738011Sbostic *e_token++ = '\''; 33835500Sbostic } 33935500Sbostic stop_lit: 34035500Sbostic code = ident; 34135500Sbostic break; 3428804Smckusick 34335500Sbostic case ('('): 34435500Sbostic case ('['): 34535500Sbostic unary_delim = true; 34635500Sbostic code = lparen; 34735500Sbostic break; 3488804Smckusick 34935500Sbostic case (')'): 35035500Sbostic case (']'): 35135500Sbostic code = rparen; 35235500Sbostic break; 3538804Smckusick 35435500Sbostic case '#': 35535500Sbostic unary_delim = ps.last_u_d; 35635500Sbostic code = preesc; 35735500Sbostic break; 3588804Smckusick 35935500Sbostic case '?': 36035500Sbostic unary_delim = true; 36135500Sbostic code = question; 36235500Sbostic break; 3638804Smckusick 36435500Sbostic case (':'): 36535500Sbostic code = colon; 36635500Sbostic unary_delim = true; 36735500Sbostic break; 3688804Smckusick 36935500Sbostic case (';'): 37035500Sbostic unary_delim = true; 37135500Sbostic code = semicolon; 37235500Sbostic break; 3738804Smckusick 37435500Sbostic case ('{'): 37535500Sbostic unary_delim = true; 37624455Smckusick 37735500Sbostic /* 37835500Sbostic * if (ps.in_or_st) ps.block_init = 1; 37935500Sbostic */ 38035500Sbostic /* ? code = ps.block_init ? lparen : lbrace; */ 38135500Sbostic code = lbrace; 38235500Sbostic break; 3838804Smckusick 38435500Sbostic case ('}'): 38535500Sbostic unary_delim = true; 38635500Sbostic /* ? code = ps.block_init ? rparen : rbrace; */ 38735500Sbostic code = rbrace; 38835500Sbostic break; 3898804Smckusick 39035500Sbostic case 014: /* a form feed */ 39135500Sbostic unary_delim = ps.last_u_d; 39235500Sbostic ps.last_nl = true; /* remember this so we can set 'ps.col_1' 39324455Smckusick * right */ 39435500Sbostic code = form_feed; 39535500Sbostic break; 3968804Smckusick 39735500Sbostic case (','): 39835500Sbostic unary_delim = true; 39935500Sbostic code = comma; 40035500Sbostic break; 4018804Smckusick 40235500Sbostic case '.': 40335500Sbostic unary_delim = false; 40435500Sbostic code = period; 40535500Sbostic break; 4068804Smckusick 40735500Sbostic case '-': 40835500Sbostic case '+': /* check for -, +, --, ++ */ 40935500Sbostic code = (ps.last_u_d ? unary_op : binary_op); 41035500Sbostic unary_delim = true; 4118804Smckusick 41235500Sbostic if (*buf_ptr == token[0]) { 41335500Sbostic /* check for doubled character */ 41438011Sbostic *e_token++ = *buf_ptr++; 41535500Sbostic /* buffer overflow will be checked at end of loop */ 41635500Sbostic if (last_code == ident || last_code == rparen) { 41735500Sbostic code = (ps.last_u_d ? unary_op : postop); 41835500Sbostic /* check for following ++ or -- */ 41935500Sbostic unary_delim = false; 4208804Smckusick } 42135500Sbostic } 42235500Sbostic else if (*buf_ptr == '=') 42335500Sbostic /* check for operator += */ 42438011Sbostic *e_token++ = *buf_ptr++; 42535500Sbostic else if (*buf_ptr == '>') { 42635500Sbostic /* check for operator -> */ 42738011Sbostic *e_token++ = *buf_ptr++; 42835500Sbostic if (!pointer_as_binop) { 42935500Sbostic unary_delim = false; 43035500Sbostic code = unary_op; 43135500Sbostic ps.want_blank = false; 43224455Smckusick } 43335500Sbostic } 43435500Sbostic break; /* buffer overflow will be checked at end of 43535500Sbostic * switch */ 4368804Smckusick 43735500Sbostic case '=': 43835500Sbostic if (ps.in_or_st) 43935500Sbostic ps.block_init = 1; 44035500Sbostic #ifdef undef 44135500Sbostic if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 44238011Sbostic e_token[-1] = *buf_ptr++; 44338011Sbostic if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 44438011Sbostic *e_token++ = *buf_ptr++; 44538011Sbostic *e_token++ = '='; /* Flip =+ to += */ 44638011Sbostic *e_token = 0; 44735500Sbostic } 44835500Sbostic #else 44935500Sbostic if (*buf_ptr == '=') {/* == */ 45038011Sbostic *e_token++ = '='; /* Flip =+ to += */ 45135500Sbostic buf_ptr++; 45238011Sbostic *e_token = 0; 45335500Sbostic } 45435500Sbostic #endif 45535500Sbostic code = binary_op; 45635500Sbostic unary_delim = true; 45735500Sbostic break; 45835500Sbostic /* can drop thru!!! */ 4598804Smckusick 46035500Sbostic case '>': 46135500Sbostic case '<': 46235500Sbostic case '!': /* ops like <, <<, <=, !=, etc */ 46335500Sbostic if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 46438011Sbostic *e_token++ = *buf_ptr; 46535500Sbostic if (++buf_ptr >= buf_end) 46635500Sbostic fill_buffer(); 46735500Sbostic } 46835500Sbostic if (*buf_ptr == '=') 46938011Sbostic *e_token++ = *buf_ptr++; 47035500Sbostic code = (ps.last_u_d ? unary_op : binary_op); 47135500Sbostic unary_delim = true; 47235500Sbostic break; 4738804Smckusick 47435500Sbostic default: 47535500Sbostic if (token[0] == '/' && *buf_ptr == '*') { 47635500Sbostic /* it is start of comment */ 47738011Sbostic *e_token++ = '*'; 4788804Smckusick 47935500Sbostic if (++buf_ptr >= buf_end) 48035500Sbostic fill_buffer(); 4818804Smckusick 48235500Sbostic code = comment; 48335500Sbostic unary_delim = ps.last_u_d; 48435500Sbostic break; 48535500Sbostic } 48638011Sbostic while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 48735500Sbostic /* 48835500Sbostic * handle ||, &&, etc, and also things as in int *****i 48935500Sbostic */ 49038011Sbostic *e_token++ = *buf_ptr; 49135500Sbostic if (++buf_ptr >= buf_end) 49235500Sbostic fill_buffer(); 49335500Sbostic } 49435500Sbostic code = (ps.last_u_d ? unary_op : binary_op); 49535500Sbostic unary_delim = true; 4968804Smckusick 4978804Smckusick 49824455Smckusick } /* end of switch */ 4998804Smckusick if (code != newline) { 5008804Smckusick l_struct = false; 5018804Smckusick last_code = code; 5028804Smckusick } 50324455Smckusick if (buf_ptr >= buf_end) /* check for input buffer empty */ 50424455Smckusick fill_buffer(); 50524455Smckusick ps.last_u_d = unary_delim; 50638011Sbostic *e_token = '\0'; /* null terminate the token */ 5078804Smckusick return (code); 50836971Sbostic } 50924455Smckusick 51035500Sbostic /* 51135500Sbostic * Add the given keyword to the keyword table, using val as the keyword type 51235500Sbostic */ 51335500Sbostic addkey(key, val) 51435500Sbostic char *key; 51524455Smckusick { 51624455Smckusick register struct templ *p = specials; 51724455Smckusick while (p->rwd) 51824455Smckusick if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 51924455Smckusick return; 52024455Smckusick else 52124455Smckusick p++; 52224455Smckusick if (p >= specials + sizeof specials / sizeof specials[0]) 52324455Smckusick return; /* For now, table overflows are silently 52435500Sbostic * ignored */ 52524455Smckusick p->rwd = key; 52624455Smckusick p->rwcode = val; 52724455Smckusick p[1].rwd = 0; 52824455Smckusick p[1].rwcode = 0; 52924455Smckusick return; 53024455Smckusick } 531