1*44021Sbostic /* awk.lx.l 4.4 90/06/25 */ 26668Smckusick 3*44021Sbostic %X str chc reg comment 46668Smckusick 56668Smckusick %{ 6*44021Sbostic #include <string.h> 76668Smckusick #include "awk.h" 86668Smckusick #include "awk.def" 96668Smckusick extern int yylval; 106668Smckusick extern int mustfld; 1117490Ssam extern int ldbg; 12*44021Sbostic extern char *lexprog; 136668Smckusick 14*44021Sbostic #undef YY_INPUT 15*44021Sbostic #define YY_INPUT(buf,result,max_size) \ 16*44021Sbostic { \ 17*44021Sbostic if ( lexprog ) \ 18*44021Sbostic { \ 19*44021Sbostic result = strlen( lexprog ); \ 20*44021Sbostic if ( result > max_size ) \ 21*44021Sbostic { \ 22*44021Sbostic result = max_size; \ 23*44021Sbostic strncpy( buf, lexprog, result ); \ 24*44021Sbostic } \ 25*44021Sbostic else \ 26*44021Sbostic strcpy( buf, lexprog ); \ 27*44021Sbostic lexprog += result; \ 28*44021Sbostic } \ 29*44021Sbostic else \ 30*44021Sbostic result = read( fileno(yyin), buf, max_size ); \ 31*44021Sbostic } 32*44021Sbostic 336668Smckusick int lineno = 1; 3417490Ssam #define RETURN(x) {if (ldbg) ptoken(x); return(x); } 35*44021Sbostic #define CADD cbuf[clen++]=yytext[0]; if(clen>=CBUFLEN-1) {yyerror("string too long", cbuf); BEGIN INITIAL;} 366668Smckusick #define CBUFLEN 150 376668Smckusick char cbuf[CBUFLEN]; 386668Smckusick int clen, cflag; 396668Smckusick %} 406668Smckusick 416668Smckusick A [a-zA-Z_] 426668Smckusick B [a-zA-Z0-9_] 436668Smckusick D [0-9] 446668Smckusick WS [ \t] 456668Smckusick 466668Smckusick %% 47*44021Sbostic static int sc_flag = 0; 48*44021Sbostic 49*44021Sbostic if ( sc_flag ) { 50*44021Sbostic BEGIN INITIAL; 51*44021Sbostic sc_flag = 0; 526668Smckusick RETURN('}'); 536668Smckusick } 546668Smckusick 55*44021Sbostic ^\n lineno++; 56*44021Sbostic ^{WS}*#.*\n lineno++; /* strip comment lines */ 57*44021Sbostic {WS} ; 58*44021Sbostic <INITIAL,reg>"\\"\n lineno++; 59*44021Sbostic "||" RETURN(BOR); 60*44021Sbostic BEGIN RETURN(XBEGIN); 61*44021Sbostic END RETURN(XEND); 62*44021Sbostic PROGEND RETURN(EOF); 63*44021Sbostic "&&" RETURN(AND); 64*44021Sbostic "!" RETURN(NOT); 65*44021Sbostic "!=" { yylval = NE; RETURN(RELOP); } 66*44021Sbostic "~" { yylval = MATCH; RETURN(MATCHOP); } 67*44021Sbostic "!~" { yylval = NOTMATCH; RETURN(MATCHOP); } 68*44021Sbostic "<" { yylval = LT; RETURN(RELOP); } 69*44021Sbostic "<=" { yylval = LE; RETURN(RELOP); } 70*44021Sbostic "==" { yylval = EQ; RETURN(RELOP); } 71*44021Sbostic ">=" { yylval = GE; RETURN(RELOP); } 72*44021Sbostic ">" { yylval = GT; RETURN(RELOP); } 73*44021Sbostic ">>" { yylval = APPEND; RETURN(RELOP); } 74*44021Sbostic "++" { yylval = INCR; RETURN(INCR); } 75*44021Sbostic "--" { yylval = DECR; RETURN(DECR); } 76*44021Sbostic "+=" { yylval = ADDEQ; RETURN(ASGNOP); } 77*44021Sbostic "-=" { yylval = SUBEQ; RETURN(ASGNOP); } 78*44021Sbostic "*=" { yylval = MULTEQ; RETURN(ASGNOP); } 79*44021Sbostic "/=" { yylval = DIVEQ; RETURN(ASGNOP); } 80*44021Sbostic "%=" { yylval = MODEQ; RETURN(ASGNOP); } 81*44021Sbostic "=" { yylval = ASSIGN; RETURN(ASGNOP); } 826668Smckusick 83*44021Sbostic "$"{D}+ { if (atoi(yytext+1)==0) { 846668Smckusick yylval = (hack)lookup("$record", symtab, 0); 856668Smckusick RETURN(STRING); 866668Smckusick } else { 876668Smckusick yylval = fieldadr(atoi(yytext+1)); 886668Smckusick RETURN(FIELD); 896668Smckusick } 906668Smckusick } 91*44021Sbostic "$"{WS}* { RETURN(INDIRECT); } 92*44021Sbostic NF { mustfld=1; yylval = (hack)setsymtab(yytext, EMPTY, 0.0, NUM, symtab); RETURN(VAR); } 93*44021Sbostic ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { 9410794Ssam yylval = (hack)setsymtab(yytext, EMPTY, atof(yytext), CON|NUM, symtab); RETURN(NUMBER); } 95*44021Sbostic "}"{WS}*\n { sc_flag = 1; lineno++; RETURN(';'); } 96*44021Sbostic "}" { sc_flag = 1; RETURN(';'); } 97*44021Sbostic ;\n { lineno++; RETURN(';'); } 98*44021Sbostic \n { lineno++; RETURN(NL); } 99*44021Sbostic while RETURN(WHILE); 100*44021Sbostic for RETURN(FOR); 101*44021Sbostic if RETURN(IF); 102*44021Sbostic else RETURN(ELSE); 103*44021Sbostic next RETURN(NEXT); 104*44021Sbostic exit RETURN(EXIT); 105*44021Sbostic break RETURN(BREAK); 106*44021Sbostic continue RETURN(CONTINUE); 107*44021Sbostic print { yylval = PRINT; RETURN(PRINT); } 108*44021Sbostic printf { yylval = PRINTF; RETURN(PRINTF); } 109*44021Sbostic sprintf { yylval = SPRINTF; RETURN(SPRINTF); } 110*44021Sbostic split { yylval = SPLIT; RETURN(SPLIT); } 111*44021Sbostic substr RETURN(SUBSTR); 112*44021Sbostic index RETURN(INDEX); 113*44021Sbostic in RETURN(IN); 114*44021Sbostic getline RETURN(GETLINE); 115*44021Sbostic length { yylval = FLENGTH; RETURN(FNCN); } 116*44021Sbostic log { yylval = FLOG; RETURN(FNCN); } 117*44021Sbostic int { yylval = FINT; RETURN(FNCN); } 118*44021Sbostic exp { yylval = FEXP; RETURN(FNCN); } 119*44021Sbostic sqrt { yylval = FSQRT; RETURN(FNCN); } 120*44021Sbostic {A}{B}* { yylval = (hack)setsymtab(yytext, tostring(""), 0.0, STR|NUM, symtab); RETURN(VAR); } 121*44021Sbostic \" { BEGIN str; clen=0; } 1226668Smckusick 123*44021Sbostic # { BEGIN comment; } 124*44021Sbostic <comment>\n { BEGIN INITIAL; lineno++; RETURN(NL); } 1256668Smckusick <comment>. ; 1266668Smckusick 127*44021Sbostic . { yylval = yytext[0]; RETURN(yytext[0]); } 1286668Smckusick 1296668Smckusick <reg>"[" { BEGIN chc; clen=0; cflag=0; } 1306668Smckusick <reg>"[^" { BEGIN chc; clen=0; cflag=1; } 1316668Smckusick 1326668Smckusick <reg>"?" RETURN(QUEST); 1336668Smckusick <reg>"+" RETURN(PLUS); 1346668Smckusick <reg>"*" RETURN(STAR); 1356668Smckusick <reg>"|" RETURN(OR); 1366668Smckusick <reg>"." RETURN(DOT); 1376668Smckusick <reg>"(" RETURN('('); 1386668Smckusick <reg>")" RETURN(')'); 1396668Smckusick <reg>"^" RETURN('^'); 1406668Smckusick <reg>"$" RETURN('$'); 1416668Smckusick <reg>\\{D}{D}{D} { sscanf(yytext+1, "%o", &yylval); RETURN(CHAR); } 1426668Smckusick <reg>\\. { if (yytext[1]=='n') yylval = '\n'; 1436668Smckusick else if (yytext[1] == 't') yylval = '\t'; 1446668Smckusick else yylval = yytext[1]; 1456668Smckusick RETURN(CHAR); 1466668Smckusick } 147*44021Sbostic <reg>"/" { BEGIN INITIAL; unput('/'); } 148*44021Sbostic <reg>\n { yyerror("newline in regular expression"); lineno++; BEGIN INITIAL; } 1496668Smckusick <reg>. { yylval = yytext[0]; RETURN(CHAR); } 1506668Smckusick 151*44021Sbostic <str>\" { char *s; BEGIN INITIAL; cbuf[clen]=0; s = tostring(cbuf); 1526668Smckusick cbuf[clen] = ' '; cbuf[++clen] = 0; 1536668Smckusick yylval = (hack)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RETURN(STRING); } 154*44021Sbostic <str>\n { yyerror("newline in string"); lineno++; BEGIN INITIAL; } 1556668Smckusick <str>"\\\"" { cbuf[clen++]='"'; } 1566668Smckusick <str,chc>"\\"n { cbuf[clen++]='\n'; } 1576668Smckusick <str,chc>"\\"t { cbuf[clen++]='\t'; } 1586668Smckusick <str,chc>"\\\\" { cbuf[clen++]='\\'; } 1596668Smckusick <str>. { CADD; } 1606668Smckusick 1616668Smckusick <chc>"\\""]" { cbuf[clen++]=']'; } 1626668Smckusick <chc>"]" { BEGIN reg; cbuf[clen]=0; yylval = (hack)tostring(cbuf); 1636668Smckusick if (cflag==0) { RETURN(CCL); } 1646668Smckusick else { RETURN(NCCL); } } 165*44021Sbostic <chc>\n { yyerror("newline in character class"); lineno++; BEGIN INITIAL; } 1666668Smckusick <chc>. { CADD; } 1676668Smckusick 1686668Smckusick %% 1696668Smckusick 1706668Smckusick startreg() 1716668Smckusick { 1726668Smckusick BEGIN reg; 1736668Smckusick } 17417490Ssam 17517490Ssam ptoken(n) 17617490Ssam { 17717490Ssam extern struct tok { 17817490Ssam char *tnm; 17917490Ssam int yval; 18017490Ssam } tok[]; 18117490Ssam extern int yylval; 18217490Ssam 18317490Ssam printf("lex:"); 18417490Ssam if (n < 128) { 18517490Ssam printf(" %c\n",n); 18617490Ssam return; 18717490Ssam } 18817490Ssam if (n <= 256 || n >= LASTTOKEN) { 18917490Ssam printf("? %o\n",n); 19017490Ssam return; 19117490Ssam } 19217490Ssam printf(" %s",tok[n-257].tnm); 19317490Ssam switch (n) { 19417490Ssam 19517490Ssam case RELOP: 19617490Ssam case MATCHOP: 19717490Ssam case ASGNOP: 19817490Ssam case STRING: 19917490Ssam case FIELD: 20017490Ssam case VAR: 20117490Ssam case NUMBER: 20217490Ssam case FNCN: 20317490Ssam printf(" (%s)", yytext); 20417490Ssam break; 20517490Ssam 20617490Ssam case CHAR: 20717490Ssam printf(" (%o)", yylval); 20817490Ssam break; 20917490Ssam } 21017490Ssam putchar('\n'); 21117490Ssam } 212