148236Sbostic /*- 248236Sbostic * Copyright (c) 1991 The Regents of the University of California. 348236Sbostic * All rights reserved. 448236Sbostic * 548236Sbostic * %sccs.include.proprietary.c% 648236Sbostic */ 76668Smckusick 8*65931Svern %X str chc reg comment 9*65931Svern 10*65931Svern %{ 1148236Sbostic #ifndef lint 12*65931Svern static char sccsid[] = "@(#)awk.lx.l 4.6 (Berkeley) 01/28/94"; 1348236Sbostic #endif /* not lint */ 1448236Sbostic 1544021Sbostic #include <string.h> 166668Smckusick #include "awk.h" 176668Smckusick #include "awk.def" 186668Smckusick extern int yylval; 196668Smckusick extern int mustfld; 2017490Ssam extern int ldbg; 2144021Sbostic extern char *lexprog; 226668Smckusick 2344021Sbostic #undef YY_INPUT 2444021Sbostic #define YY_INPUT(buf,result,max_size) \ 2544021Sbostic { \ 2644021Sbostic if ( lexprog ) \ 2744021Sbostic { \ 2844021Sbostic result = strlen( lexprog ); \ 2944021Sbostic if ( result > max_size ) \ 3044021Sbostic { \ 3144021Sbostic result = max_size; \ 3244021Sbostic strncpy( buf, lexprog, result ); \ 3344021Sbostic } \ 3444021Sbostic else \ 3544021Sbostic strcpy( buf, lexprog ); \ 3644021Sbostic lexprog += result; \ 3744021Sbostic } \ 3844021Sbostic else \ 3944021Sbostic result = read( fileno(yyin), buf, max_size ); \ 4044021Sbostic } 4144021Sbostic 426668Smckusick int lineno = 1; 4317490Ssam #define RETURN(x) {if (ldbg) ptoken(x); return(x); } 4444021Sbostic #define CADD cbuf[clen++]=yytext[0]; if(clen>=CBUFLEN-1) {yyerror("string too long", cbuf); BEGIN INITIAL;} 456668Smckusick #define CBUFLEN 150 466668Smckusick char cbuf[CBUFLEN]; 476668Smckusick int clen, cflag; 486668Smckusick %} 496668Smckusick 506668Smckusick A [a-zA-Z_] 516668Smckusick B [a-zA-Z0-9_] 526668Smckusick D [0-9] 536668Smckusick WS [ \t] 546668Smckusick 556668Smckusick %% 5644021Sbostic static int sc_flag = 0; 5744021Sbostic 5844021Sbostic if ( sc_flag ) { 5944021Sbostic BEGIN INITIAL; 6044021Sbostic sc_flag = 0; 616668Smckusick RETURN('}'); 626668Smckusick } 636668Smckusick 6444021Sbostic ^\n lineno++; 6544021Sbostic ^{WS}*#.*\n lineno++; /* strip comment lines */ 6644021Sbostic {WS} ; 6744021Sbostic <INITIAL,reg>"\\"\n lineno++; 6844021Sbostic "||" RETURN(BOR); 6944021Sbostic BEGIN RETURN(XBEGIN); 7044021Sbostic END RETURN(XEND); 7144021Sbostic PROGEND RETURN(EOF); 7244021Sbostic "&&" RETURN(AND); 7344021Sbostic "!" RETURN(NOT); 7444021Sbostic "!=" { yylval = NE; RETURN(RELOP); } 7544021Sbostic "~" { yylval = MATCH; RETURN(MATCHOP); } 7644021Sbostic "!~" { yylval = NOTMATCH; RETURN(MATCHOP); } 7744021Sbostic "<" { yylval = LT; RETURN(RELOP); } 7844021Sbostic "<=" { yylval = LE; RETURN(RELOP); } 7944021Sbostic "==" { yylval = EQ; RETURN(RELOP); } 8044021Sbostic ">=" { yylval = GE; RETURN(RELOP); } 8144021Sbostic ">" { yylval = GT; RETURN(RELOP); } 8244021Sbostic ">>" { yylval = APPEND; RETURN(RELOP); } 8344021Sbostic "++" { yylval = INCR; RETURN(INCR); } 8444021Sbostic "--" { yylval = DECR; RETURN(DECR); } 8544021Sbostic "+=" { yylval = ADDEQ; RETURN(ASGNOP); } 8644021Sbostic "-=" { yylval = SUBEQ; RETURN(ASGNOP); } 8744021Sbostic "*=" { yylval = MULTEQ; RETURN(ASGNOP); } 8844021Sbostic "/=" { yylval = DIVEQ; RETURN(ASGNOP); } 8944021Sbostic "%=" { yylval = MODEQ; RETURN(ASGNOP); } 9044021Sbostic "=" { yylval = ASSIGN; RETURN(ASGNOP); } 916668Smckusick 9244021Sbostic "$"{D}+ { if (atoi(yytext+1)==0) { 936668Smckusick yylval = (hack)lookup("$record", symtab, 0); 946668Smckusick RETURN(STRING); 956668Smckusick } else { 966668Smckusick yylval = fieldadr(atoi(yytext+1)); 976668Smckusick RETURN(FIELD); 986668Smckusick } 996668Smckusick } 10044021Sbostic "$"{WS}* { RETURN(INDIRECT); } 10144021Sbostic NF { mustfld=1; yylval = (hack)setsymtab(yytext, EMPTY, 0.0, NUM, symtab); RETURN(VAR); } 10244021Sbostic ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { 10310794Ssam yylval = (hack)setsymtab(yytext, EMPTY, atof(yytext), CON|NUM, symtab); RETURN(NUMBER); } 10444021Sbostic "}"{WS}*\n { sc_flag = 1; lineno++; RETURN(';'); } 10544021Sbostic "}" { sc_flag = 1; RETURN(';'); } 10644021Sbostic ;\n { lineno++; RETURN(';'); } 10744021Sbostic \n { lineno++; RETURN(NL); } 10844021Sbostic while RETURN(WHILE); 10944021Sbostic for RETURN(FOR); 11044021Sbostic if RETURN(IF); 11144021Sbostic else RETURN(ELSE); 11244021Sbostic next RETURN(NEXT); 11344021Sbostic exit RETURN(EXIT); 11444021Sbostic break RETURN(BREAK); 11544021Sbostic continue RETURN(CONTINUE); 11644021Sbostic print { yylval = PRINT; RETURN(PRINT); } 11744021Sbostic printf { yylval = PRINTF; RETURN(PRINTF); } 11844021Sbostic sprintf { yylval = SPRINTF; RETURN(SPRINTF); } 11944021Sbostic split { yylval = SPLIT; RETURN(SPLIT); } 12044021Sbostic substr RETURN(SUBSTR); 12144021Sbostic index RETURN(INDEX); 12244021Sbostic in RETURN(IN); 12344021Sbostic getline RETURN(GETLINE); 12444021Sbostic length { yylval = FLENGTH; RETURN(FNCN); } 12544021Sbostic log { yylval = FLOG; RETURN(FNCN); } 12644021Sbostic int { yylval = FINT; RETURN(FNCN); } 12744021Sbostic exp { yylval = FEXP; RETURN(FNCN); } 12844021Sbostic sqrt { yylval = FSQRT; RETURN(FNCN); } 12944021Sbostic {A}{B}* { yylval = (hack)setsymtab(yytext, tostring(""), 0.0, STR|NUM, symtab); RETURN(VAR); } 13044021Sbostic \" { BEGIN str; clen=0; } 1316668Smckusick 13244021Sbostic # { BEGIN comment; } 13344021Sbostic <comment>\n { BEGIN INITIAL; lineno++; RETURN(NL); } 1346668Smckusick <comment>. ; 1356668Smckusick 13644021Sbostic . { yylval = yytext[0]; RETURN(yytext[0]); } 1376668Smckusick 1386668Smckusick <reg>"[" { BEGIN chc; clen=0; cflag=0; } 1396668Smckusick <reg>"[^" { BEGIN chc; clen=0; cflag=1; } 1406668Smckusick 1416668Smckusick <reg>"?" RETURN(QUEST); 1426668Smckusick <reg>"+" RETURN(PLUS); 1436668Smckusick <reg>"*" RETURN(STAR); 1446668Smckusick <reg>"|" RETURN(OR); 1456668Smckusick <reg>"." RETURN(DOT); 1466668Smckusick <reg>"(" RETURN('('); 1476668Smckusick <reg>")" RETURN(')'); 1486668Smckusick <reg>"^" RETURN('^'); 1496668Smckusick <reg>"$" RETURN('$'); 1506668Smckusick <reg>\\{D}{D}{D} { sscanf(yytext+1, "%o", &yylval); RETURN(CHAR); } 1516668Smckusick <reg>\\. { if (yytext[1]=='n') yylval = '\n'; 1526668Smckusick else if (yytext[1] == 't') yylval = '\t'; 1536668Smckusick else yylval = yytext[1]; 1546668Smckusick RETURN(CHAR); 1556668Smckusick } 15644021Sbostic <reg>"/" { BEGIN INITIAL; unput('/'); } 15744021Sbostic <reg>\n { yyerror("newline in regular expression"); lineno++; BEGIN INITIAL; } 1586668Smckusick <reg>. { yylval = yytext[0]; RETURN(CHAR); } 1596668Smckusick 16044021Sbostic <str>\" { char *s; BEGIN INITIAL; cbuf[clen]=0; s = tostring(cbuf); 1616668Smckusick cbuf[clen] = ' '; cbuf[++clen] = 0; 1626668Smckusick yylval = (hack)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RETURN(STRING); } 16344021Sbostic <str>\n { yyerror("newline in string"); lineno++; BEGIN INITIAL; } 1646668Smckusick <str>"\\\"" { cbuf[clen++]='"'; } 1656668Smckusick <str,chc>"\\"n { cbuf[clen++]='\n'; } 1666668Smckusick <str,chc>"\\"t { cbuf[clen++]='\t'; } 1676668Smckusick <str,chc>"\\\\" { cbuf[clen++]='\\'; } 1686668Smckusick <str>. { CADD; } 1696668Smckusick 1706668Smckusick <chc>"\\""]" { cbuf[clen++]=']'; } 1716668Smckusick <chc>"]" { BEGIN reg; cbuf[clen]=0; yylval = (hack)tostring(cbuf); 1726668Smckusick if (cflag==0) { RETURN(CCL); } 1736668Smckusick else { RETURN(NCCL); } } 17444021Sbostic <chc>\n { yyerror("newline in character class"); lineno++; BEGIN INITIAL; } 1756668Smckusick <chc>. { CADD; } 1766668Smckusick 1776668Smckusick %% 1786668Smckusick 1796668Smckusick startreg() 1806668Smckusick { 1816668Smckusick BEGIN reg; 1826668Smckusick } 18317490Ssam 18417490Ssam ptoken(n) 18517490Ssam { 18617490Ssam extern struct tok { 18717490Ssam char *tnm; 18817490Ssam int yval; 18917490Ssam } tok[]; 19017490Ssam extern int yylval; 19117490Ssam 19217490Ssam printf("lex:"); 19317490Ssam if (n < 128) { 19417490Ssam printf(" %c\n",n); 19517490Ssam return; 19617490Ssam } 19717490Ssam if (n <= 256 || n >= LASTTOKEN) { 19817490Ssam printf("? %o\n",n); 19917490Ssam return; 20017490Ssam } 20117490Ssam printf(" %s",tok[n-257].tnm); 20217490Ssam switch (n) { 20317490Ssam 20417490Ssam case RELOP: 20517490Ssam case MATCHOP: 20617490Ssam case ASGNOP: 20717490Ssam case STRING: 20817490Ssam case FIELD: 20917490Ssam case VAR: 21017490Ssam case NUMBER: 21117490Ssam case FNCN: 21217490Ssam printf(" (%s)", yytext); 21317490Ssam break; 21417490Ssam 21517490Ssam case CHAR: 21617490Ssam printf(" (%o)", yylval); 21717490Ssam break; 21817490Ssam } 21917490Ssam putchar('\n'); 22017490Ssam } 221