121970Sdist /* 235500Sbostic * Copyright (c) 1985 Sun Microsystems, Inc. 335500Sbostic * Copyright (c) 1980 The Regents of the University of California. 433767Sbostic * Copyright (c) 1976 Board of Trustees of the University of Illinois. 533767Sbostic * All rights reserved. 633767Sbostic * 742688Sbostic * %sccs.include.redist.c% 821970Sdist */ 98804Smckusick 1021970Sdist #ifndef lint 11*46695Sbostic static char sccsid[] = "@(#)lexi.c 5.16 (Berkeley) 02/26/91"; 1233767Sbostic #endif /* not lint */ 1321970Sdist 1433767Sbostic /* 1535500Sbostic * Here we have the token scanner for indent. It scans off one token and puts 1635500Sbostic * it in the global variable "token". It returns a code, indicating the type 1735500Sbostic * of token scanned. 1824455Smckusick */ 198804Smckusick 20*46695Sbostic #include <stdio.h> 21*46695Sbostic #include <ctype.h> 22*46695Sbostic #include <stdlib.h> 23*46695Sbostic #include <string.h> 2435504Sbostic #include "indent_globs.h" 2535504Sbostic #include "indent_codes.h" 268804Smckusick 278804Smckusick #define alphanum 1 288804Smckusick #define opchar 3 298804Smckusick 308804Smckusick struct templ { 3124455Smckusick char *rwd; 3224455Smckusick int rwcode; 338804Smckusick }; 348804Smckusick 3524455Smckusick struct templ specials[100] = 368804Smckusick { 378804Smckusick "switch", 1, 388804Smckusick "case", 2, 3924455Smckusick "break", 0, 408804Smckusick "struct", 3, 4124455Smckusick "union", 3, 4224455Smckusick "enum", 3, 438804Smckusick "default", 2, 448804Smckusick "int", 4, 458804Smckusick "char", 4, 468804Smckusick "float", 4, 478804Smckusick "double", 4, 488804Smckusick "long", 4, 498804Smckusick "short", 4, 508804Smckusick "typdef", 4, 518804Smckusick "unsigned", 4, 528804Smckusick "register", 4, 538804Smckusick "static", 4, 548804Smckusick "global", 4, 558804Smckusick "extern", 4, 5624455Smckusick "void", 4, 5724455Smckusick "goto", 0, 5824455Smckusick "return", 0, 598804Smckusick "if", 5, 608804Smckusick "while", 5, 618804Smckusick "for", 5, 628804Smckusick "else", 6, 638804Smckusick "do", 6, 6424455Smckusick "sizeof", 7, 658804Smckusick 0, 0 668804Smckusick }; 678804Smckusick 6824455Smckusick char chartype[128] = 6935500Sbostic { /* this is used to facilitate the decision of 7035500Sbostic * what type (alphanumeric, operator) each 7135500Sbostic * character is */ 728804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 738804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 748804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 758804Smckusick 0, 0, 0, 0, 0, 0, 0, 0, 7633768Sbostic 0, 3, 0, 0, 1, 3, 3, 0, 7735500Sbostic 0, 0, 3, 3, 0, 3, 0, 3, 788804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 798804Smckusick 1, 1, 0, 0, 3, 3, 3, 3, 808804Smckusick 0, 1, 1, 1, 1, 1, 1, 1, 818804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 828804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 838804Smckusick 1, 1, 1, 0, 0, 0, 3, 1, 848804Smckusick 0, 1, 1, 1, 1, 1, 1, 1, 858804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 868804Smckusick 1, 1, 1, 1, 1, 1, 1, 1, 878804Smckusick 1, 1, 1, 0, 3, 0, 3, 0 888804Smckusick }; 898804Smckusick 908804Smckusick 918804Smckusick 928804Smckusick 9335500Sbostic int 9424455Smckusick lexi() 9524455Smckusick { 9635500Sbostic int unary_delim; /* this is set to 1 if the current token 9735500Sbostic * 9824455Smckusick * forces a following operator to be unary */ 9924455Smckusick static int last_code; /* the last token type returned */ 10024455Smckusick static int l_struct; /* set to 1 if the last token was 'struct' */ 10124455Smckusick int code; /* internal code to be returned */ 10224455Smckusick char qchar; /* the delimiter character for a string */ 1038804Smckusick 10438011Sbostic e_token = s_token; /* point to start of place to save token */ 1058804Smckusick unary_delim = false; 10624455Smckusick ps.col_1 = ps.last_nl; /* tell world that this token started in 10735500Sbostic * column 1 iff the last thing scanned was nl */ 10824455Smckusick ps.last_nl = false; 1098804Smckusick 11024455Smckusick while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 11135500Sbostic ps.col_1 = false; /* leading blanks imply token is not in column 11235500Sbostic * 1 */ 1138804Smckusick if (++buf_ptr >= buf_end) 11424455Smckusick fill_buffer(); 1158804Smckusick } 1168804Smckusick 11735500Sbostic /* Scan an alphanumeric token */ 11835500Sbostic if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 11935500Sbostic /* 12035500Sbostic * we have a character or number 12135500Sbostic */ 12235500Sbostic register char *j; /* used for searching thru list of 12335500Sbostic * 12424455Smckusick * reserved words */ 12524455Smckusick register struct templ *p; 1268804Smckusick 12735500Sbostic if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 12835500Sbostic int seendot = 0, 12935500Sbostic seenexp = 0; 13035500Sbostic if (*buf_ptr == '0' && 13135500Sbostic (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 13238011Sbostic *e_token++ = *buf_ptr++; 13338011Sbostic *e_token++ = *buf_ptr++; 13438011Sbostic while (isxdigit(*buf_ptr)) { 13540275Sbostic CHECK_SIZE_TOKEN; 13638011Sbostic *e_token++ = *buf_ptr++; 13738011Sbostic } 13835500Sbostic } 13935500Sbostic else 14035500Sbostic while (1) { 14135500Sbostic if (*buf_ptr == '.') 14235500Sbostic if (seendot) 14335500Sbostic break; 14435500Sbostic else 14535500Sbostic seendot++; 14640275Sbostic CHECK_SIZE_TOKEN; 14738011Sbostic *e_token++ = *buf_ptr++; 14835500Sbostic if (!isdigit(*buf_ptr) && *buf_ptr != '.') 14935500Sbostic if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 15035500Sbostic break; 15135500Sbostic else { 15235500Sbostic seenexp++; 15335500Sbostic seendot++; 15440275Sbostic CHECK_SIZE_TOKEN; 15538011Sbostic *e_token++ = *buf_ptr++; 15635500Sbostic if (*buf_ptr == '+' || *buf_ptr == '-') 15738011Sbostic *e_token++ = *buf_ptr++; 15835500Sbostic } 15935500Sbostic } 16035500Sbostic if (*buf_ptr == 'L' || *buf_ptr == 'l') 16138011Sbostic *e_token++ = *buf_ptr++; 16235500Sbostic } 16335500Sbostic else 16435500Sbostic while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 16540275Sbostic CHECK_SIZE_TOKEN; 16638011Sbostic *e_token++ = *buf_ptr++; 16735500Sbostic if (buf_ptr >= buf_end) 16835500Sbostic fill_buffer(); 16935500Sbostic } 17038011Sbostic *e_token++ = '\0'; 17124455Smckusick while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 17224455Smckusick if (++buf_ptr >= buf_end) 17324455Smckusick fill_buffer(); 17424455Smckusick } 17524455Smckusick ps.its_a_keyword = false; 17624455Smckusick ps.sizeof_keyword = false; 17735500Sbostic if (l_struct) { /* if last token was 'struct', then this token 17835500Sbostic * should be treated as a declaration */ 1798804Smckusick l_struct = false; 1808804Smckusick last_code = ident; 18124455Smckusick ps.last_u_d = true; 1828804Smckusick return (decl); 1838804Smckusick } 18424455Smckusick ps.last_u_d = false; /* Operator after indentifier is binary */ 18524455Smckusick last_code = ident; /* Remember that this is the code we will 18624455Smckusick * return */ 1878804Smckusick 18824455Smckusick /* 18935500Sbostic * This loop will check if the token is a keyword. 19024455Smckusick */ 19124455Smckusick for (p = specials; (j = p->rwd) != 0; p++) { 19238011Sbostic register char *p = s_token; /* point at scanned token */ 19338011Sbostic if (*j++ != *p++ || *j++ != *p++) 19424455Smckusick continue; /* This test depends on the fact that 19535500Sbostic * identifiers are always at least 1 character 19635500Sbostic * long (ie. the first two bytes of the 19735500Sbostic * identifier are always meaningful) */ 19838011Sbostic if (p[-1] == 0) 19924455Smckusick break; /* If its a one-character identifier */ 20038011Sbostic while (*p++ == *j) 20124455Smckusick if (*j++ == 0) 20224455Smckusick goto found_keyword; /* I wish that C had a multi-level 20324455Smckusick * break... */ 20424455Smckusick } 20524455Smckusick if (p->rwd) { /* we have a keyword */ 20624455Smckusick found_keyword: 20724455Smckusick ps.its_a_keyword = true; 20824455Smckusick ps.last_u_d = true; 20924455Smckusick switch (p->rwcode) { 21035500Sbostic case 1: /* it is a switch */ 21135500Sbostic return (swstmt); 21235500Sbostic case 2: /* a case or default */ 21335500Sbostic return (casestmt); 2148804Smckusick 21535500Sbostic case 3: /* a "struct" */ 21635500Sbostic if (ps.p_l_follow) 21735500Sbostic break; /* inside parens: cast */ 21835500Sbostic l_struct = true; 2198804Smckusick 22035500Sbostic /* 22135500Sbostic * Next time around, we will want to know that we have had a 22235500Sbostic * 'struct' 22335500Sbostic */ 22435500Sbostic case 4: /* one of the declaration keywords */ 22535500Sbostic if (ps.p_l_follow) { 22635500Sbostic ps.cast_mask |= 1 << ps.p_l_follow; 22735500Sbostic break; /* inside parens: cast */ 22835500Sbostic } 22935500Sbostic last_code = decl; 23035500Sbostic return (decl); 2318804Smckusick 23235500Sbostic case 5: /* if, while, for */ 23335500Sbostic return (sp_paren); 2348804Smckusick 23535500Sbostic case 6: /* do, else */ 23635500Sbostic return (sp_nparen); 2378804Smckusick 23835500Sbostic case 7: 23935500Sbostic ps.sizeof_keyword = true; 24035500Sbostic default: /* all others are treated like any other 24124455Smckusick * identifier */ 24235500Sbostic return (ident); 24324455Smckusick } /* end of switch */ 24424455Smckusick } /* end of if (found_it) */ 24535500Sbostic if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 24635507Sbostic register char *tp = buf_ptr; 24735507Sbostic while (tp < buf_end) 24838011Sbostic if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 24935500Sbostic goto not_proc; 25024455Smckusick strncpy(ps.procname, token, sizeof ps.procname - 1); 25124455Smckusick ps.in_parameter_declaration = 1; 25238011Sbostic rparen_count = 1; 25335500Sbostic not_proc:; 25424455Smckusick } 25524455Smckusick /* 25624455Smckusick * The following hack attempts to guess whether or not the current 25724455Smckusick * token is in fact a declaration keyword -- one that has been 25835500Sbostic * typedefd 25924455Smckusick */ 26035500Sbostic if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 26135500Sbostic && !ps.p_l_follow 26235500Sbostic && !ps.block_init 26335500Sbostic && (ps.last_token == rparen || ps.last_token == semicolon || 26435500Sbostic ps.last_token == decl || 26535500Sbostic ps.last_token == lbrace || ps.last_token == rbrace)) { 26624455Smckusick ps.its_a_keyword = true; 26724455Smckusick ps.last_u_d = true; 26824455Smckusick last_code = decl; 26924455Smckusick return decl; 2708804Smckusick } 27124455Smckusick if (last_code == decl) /* if this is a declared variable, then 27224455Smckusick * following sign is unary */ 27324455Smckusick ps.last_u_d = true; /* will make "int a -1" work */ 2748804Smckusick last_code = ident; 27524455Smckusick return (ident); /* the ident is not in the list */ 27624455Smckusick } /* end of procesing for alpanum character */ 2778804Smckusick 27838011Sbostic /* Scan a non-alphanumeric token */ 27938011Sbostic 28038011Sbostic *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 28135500Sbostic * moved here */ 28238011Sbostic *e_token = '\0'; 2838804Smckusick if (++buf_ptr >= buf_end) 28424455Smckusick fill_buffer(); 2858804Smckusick 2868804Smckusick switch (*token) { 28735500Sbostic case '\n': 28835500Sbostic unary_delim = ps.last_u_d; 28935500Sbostic ps.last_nl = true; /* remember that we just had a newline */ 29035500Sbostic code = (had_eof ? 0 : newline); 29124455Smckusick 29235500Sbostic /* 29335500Sbostic * if data has been exausted, the newline is a dummy, and we should 29435500Sbostic * return code to stop 29535500Sbostic */ 29635500Sbostic break; 2978804Smckusick 29835500Sbostic case '\'': /* start of quoted character */ 29935500Sbostic case '"': /* start of string */ 30035500Sbostic qchar = *token; 30135500Sbostic if (troff) { 30238011Sbostic e_token[-1] = '`'; 30335500Sbostic if (qchar == '"') 30438011Sbostic *e_token++ = '`'; 30538011Sbostic e_token = chfont(&bodyf, &stringf, e_token); 30635500Sbostic } 30735500Sbostic do { /* copy the string */ 30835500Sbostic while (1) { /* move one character or [/<char>]<char> */ 30935500Sbostic if (*buf_ptr == '\n') { 31035500Sbostic printf("%d: Unterminated literal\n", line_no); 31135500Sbostic goto stop_lit; 31235500Sbostic } 31340275Sbostic CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 31440275Sbostic * since CHECK_SIZE guarantees that there 31538011Sbostic * are at least 5 entries left */ 31638011Sbostic *e_token = *buf_ptr++; 31735500Sbostic if (buf_ptr >= buf_end) 31835500Sbostic fill_buffer(); 31938011Sbostic if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 32035500Sbostic if (*buf_ptr == '\n') /* check for escaped newline */ 32135500Sbostic ++line_no; 32235500Sbostic if (troff) { 32338011Sbostic *++e_token = BACKSLASH; 32435500Sbostic if (*buf_ptr == BACKSLASH) 32538011Sbostic *++e_token = BACKSLASH; 3268804Smckusick } 32738011Sbostic *++e_token = *buf_ptr++; 32838011Sbostic ++e_token; /* we must increment this again because we 32935500Sbostic * copied two chars */ 3308804Smckusick if (buf_ptr >= buf_end) 33124455Smckusick fill_buffer(); 33235500Sbostic } 33335500Sbostic else 33435500Sbostic break; /* we copied one character */ 33535500Sbostic } /* end of while (1) */ 33638011Sbostic } while (*e_token++ != qchar); 33735500Sbostic if (troff) { 33838011Sbostic e_token = chfont(&stringf, &bodyf, e_token - 1); 33935500Sbostic if (qchar == '"') 34038011Sbostic *e_token++ = '\''; 34135500Sbostic } 34235500Sbostic stop_lit: 34335500Sbostic code = ident; 34435500Sbostic break; 3458804Smckusick 34635500Sbostic case ('('): 34735500Sbostic case ('['): 34835500Sbostic unary_delim = true; 34935500Sbostic code = lparen; 35035500Sbostic break; 3518804Smckusick 35235500Sbostic case (')'): 35335500Sbostic case (']'): 35435500Sbostic code = rparen; 35535500Sbostic break; 3568804Smckusick 35735500Sbostic case '#': 35835500Sbostic unary_delim = ps.last_u_d; 35935500Sbostic code = preesc; 36035500Sbostic break; 3618804Smckusick 36235500Sbostic case '?': 36335500Sbostic unary_delim = true; 36435500Sbostic code = question; 36535500Sbostic break; 3668804Smckusick 36735500Sbostic case (':'): 36835500Sbostic code = colon; 36935500Sbostic unary_delim = true; 37035500Sbostic break; 3718804Smckusick 37235500Sbostic case (';'): 37335500Sbostic unary_delim = true; 37435500Sbostic code = semicolon; 37535500Sbostic break; 3768804Smckusick 37735500Sbostic case ('{'): 37835500Sbostic unary_delim = true; 37924455Smckusick 38035500Sbostic /* 38135500Sbostic * if (ps.in_or_st) ps.block_init = 1; 38235500Sbostic */ 38335500Sbostic /* ? code = ps.block_init ? lparen : lbrace; */ 38435500Sbostic code = lbrace; 38535500Sbostic break; 3868804Smckusick 38735500Sbostic case ('}'): 38835500Sbostic unary_delim = true; 38935500Sbostic /* ? code = ps.block_init ? rparen : rbrace; */ 39035500Sbostic code = rbrace; 39135500Sbostic break; 3928804Smckusick 39335500Sbostic case 014: /* a form feed */ 39435500Sbostic unary_delim = ps.last_u_d; 39535500Sbostic ps.last_nl = true; /* remember this so we can set 'ps.col_1' 39624455Smckusick * right */ 39735500Sbostic code = form_feed; 39835500Sbostic break; 3998804Smckusick 40035500Sbostic case (','): 40135500Sbostic unary_delim = true; 40235500Sbostic code = comma; 40335500Sbostic break; 4048804Smckusick 40535500Sbostic case '.': 40635500Sbostic unary_delim = false; 40735500Sbostic code = period; 40835500Sbostic break; 4098804Smckusick 41035500Sbostic case '-': 41135500Sbostic case '+': /* check for -, +, --, ++ */ 41235500Sbostic code = (ps.last_u_d ? unary_op : binary_op); 41335500Sbostic unary_delim = true; 4148804Smckusick 41535500Sbostic if (*buf_ptr == token[0]) { 41635500Sbostic /* check for doubled character */ 41738011Sbostic *e_token++ = *buf_ptr++; 41835500Sbostic /* buffer overflow will be checked at end of loop */ 41935500Sbostic if (last_code == ident || last_code == rparen) { 42035500Sbostic code = (ps.last_u_d ? unary_op : postop); 42135500Sbostic /* check for following ++ or -- */ 42235500Sbostic unary_delim = false; 4238804Smckusick } 42435500Sbostic } 42535500Sbostic else if (*buf_ptr == '=') 42635500Sbostic /* check for operator += */ 42738011Sbostic *e_token++ = *buf_ptr++; 42835500Sbostic else if (*buf_ptr == '>') { 42935500Sbostic /* check for operator -> */ 43038011Sbostic *e_token++ = *buf_ptr++; 43135500Sbostic if (!pointer_as_binop) { 43235500Sbostic unary_delim = false; 43335500Sbostic code = unary_op; 43435500Sbostic ps.want_blank = false; 43524455Smckusick } 43635500Sbostic } 43735500Sbostic break; /* buffer overflow will be checked at end of 43835500Sbostic * switch */ 4398804Smckusick 44035500Sbostic case '=': 44135500Sbostic if (ps.in_or_st) 44235500Sbostic ps.block_init = 1; 44335500Sbostic #ifdef undef 44435500Sbostic if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 44538011Sbostic e_token[-1] = *buf_ptr++; 44638011Sbostic if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 44738011Sbostic *e_token++ = *buf_ptr++; 44838011Sbostic *e_token++ = '='; /* Flip =+ to += */ 44938011Sbostic *e_token = 0; 45035500Sbostic } 45135500Sbostic #else 45235500Sbostic if (*buf_ptr == '=') {/* == */ 45338011Sbostic *e_token++ = '='; /* Flip =+ to += */ 45435500Sbostic buf_ptr++; 45538011Sbostic *e_token = 0; 45635500Sbostic } 45735500Sbostic #endif 45835500Sbostic code = binary_op; 45935500Sbostic unary_delim = true; 46035500Sbostic break; 46135500Sbostic /* can drop thru!!! */ 4628804Smckusick 46335500Sbostic case '>': 46435500Sbostic case '<': 46535500Sbostic case '!': /* ops like <, <<, <=, !=, etc */ 46635500Sbostic if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 46738011Sbostic *e_token++ = *buf_ptr; 46835500Sbostic if (++buf_ptr >= buf_end) 46935500Sbostic fill_buffer(); 47035500Sbostic } 47135500Sbostic if (*buf_ptr == '=') 47238011Sbostic *e_token++ = *buf_ptr++; 47335500Sbostic code = (ps.last_u_d ? unary_op : binary_op); 47435500Sbostic unary_delim = true; 47535500Sbostic break; 4768804Smckusick 47735500Sbostic default: 47835500Sbostic if (token[0] == '/' && *buf_ptr == '*') { 47935500Sbostic /* it is start of comment */ 48038011Sbostic *e_token++ = '*'; 4818804Smckusick 48235500Sbostic if (++buf_ptr >= buf_end) 48335500Sbostic fill_buffer(); 4848804Smckusick 48535500Sbostic code = comment; 48635500Sbostic unary_delim = ps.last_u_d; 48735500Sbostic break; 48835500Sbostic } 48938011Sbostic while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 49035500Sbostic /* 49135500Sbostic * handle ||, &&, etc, and also things as in int *****i 49235500Sbostic */ 49338011Sbostic *e_token++ = *buf_ptr; 49435500Sbostic if (++buf_ptr >= buf_end) 49535500Sbostic fill_buffer(); 49635500Sbostic } 49735500Sbostic code = (ps.last_u_d ? unary_op : binary_op); 49835500Sbostic unary_delim = true; 4998804Smckusick 5008804Smckusick 50124455Smckusick } /* end of switch */ 5028804Smckusick if (code != newline) { 5038804Smckusick l_struct = false; 5048804Smckusick last_code = code; 5058804Smckusick } 50624455Smckusick if (buf_ptr >= buf_end) /* check for input buffer empty */ 50724455Smckusick fill_buffer(); 50824455Smckusick ps.last_u_d = unary_delim; 50938011Sbostic *e_token = '\0'; /* null terminate the token */ 5108804Smckusick return (code); 51136971Sbostic } 51224455Smckusick 51335500Sbostic /* 51435500Sbostic * Add the given keyword to the keyword table, using val as the keyword type 51535500Sbostic */ 51635500Sbostic addkey(key, val) 51735500Sbostic char *key; 51824455Smckusick { 51924455Smckusick register struct templ *p = specials; 52024455Smckusick while (p->rwd) 52124455Smckusick if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 52224455Smckusick return; 52324455Smckusick else 52424455Smckusick p++; 52524455Smckusick if (p >= specials + sizeof specials / sizeof specials[0]) 52624455Smckusick return; /* For now, table overflows are silently 52735500Sbostic * ignored */ 52824455Smckusick p->rwd = key; 52924455Smckusick p->rwcode = val; 53024455Smckusick p[1].rwd = 0; 53124455Smckusick p[1].rwcode = 0; 53224455Smckusick return; 53324455Smckusick } 534